In [9]:
# ═══════════════════════════════════════════════════════════════════
# 🔄 LOAD TRAINED MODEL
# ═══════════════════════════════════════════════════════════════════

import tensorflow as tf
import os
import numpy as np

print("Loading trained model...")

# Define custom objects
def focal_loss_dynamic(gamma=2.0, eps=1e-7):
    """Advanced Focal Loss with Dynamic Class Weighting"""
    def loss_fn(y_true, y_pred):
        y_pred = tf.clip_by_value(y_pred, eps, 1-eps)
        freq_pos = tf.reduce_mean(y_true, axis=[0,1])
        alpha = 1 - freq_pos
        term1 = - alpha * y_true * tf.pow(1-y_pred, gamma) * tf.math.log(y_pred)
        term2 = - (1-alpha) * (1-y_true) * tf.pow(y_pred, gamma) * tf.math.log(1-y_pred)
        return tf.reduce_mean(term1+term2)
    return loss_fn

def f1_metric_05(y_true, y_pred):
    """F1 Score with 0.5 threshold"""
    y_pred = tf.cast(y_pred>0.5, tf.float32)
    tp = tf.reduce_sum(y_true*y_pred)
    fp = tf.reduce_sum((1-y_true)*y_pred)
    fn = tf.reduce_sum(y_true*(1-y_pred))
    prec = tp/(tp+fp+1e-7)
    rec = tp/(tp+fn+1e-7)
    return 2*prec*rec/(prec+rec+1e-7)

f1_metric_05.__name__ = "f1_metric_05"

# Load model
MODEL_DIR = "."  # תיקן לתיקייה הנוכחית
model_path = os.path.join(MODEL_DIR, "final_model.h5")

custom_objects = {
    'focal_loss_dynamic': focal_loss_dynamic,
    'loss_fn': focal_loss_dynamic(),
    'f1_metric_05': f1_metric_05
}

try:
    model = tf.keras.models.load_model(model_path, custom_objects=custom_objects)
    print(f"✓ Model loaded successfully ({model.count_params():,} parameters)")
    
    # Quick test
    test_input = np.random.random((1, 64, 100, 1)).astype(np.float32)
    test_output = model.predict(test_input, verbose=0)
    print(f"✓ Model ready for inference")
    
except Exception as e:
    print(f"✗ Error loading model: {e}")
    model = None

Loading trained model...
✓ Model loaded successfully (876,851 parameters)
✓ Model loaded successfully (876,851 parameters)
✓ Model ready for inference
✓ Model ready for inference


In [8]:
"""
Create FULL FILES with multiple noises - WHOLE FILE APPROACH
-----------------------------------------------------------
• כל קובץ: 15 רעשים לפי התפלגות קיימת
• כל רעש: 3-4 שניות
• לייבל מדויק: רשומה נפרדת לכל רעש עם זמנים מדויקים
• יצוא:
      test_dataset/data/*.wav (קבצים שלמים עם רעשים)
      test_dataset/labels.csv (פורמט משופר)
"""
import os, random, csv, uuid
from pathlib import Path
from typing import Dict, List
from pydub import AudioSegment

# ───────── CONFIG ────────────────────────────────────────────────
WAV_DIR        = Path(r"F:\DEPP\data2\thebest\test\wav")  # קבצי הטסט
OUTPUT_DIR     = Path("test_dataset")                      # test dataset folder
MIN_NOISES_PER_FILE = 10                                   # מינימום רעשים לקובץ
MAX_NOISES_PER_FILE = 15                                   # מקסימום רעשים לקובץ

# כל רעש 3-4 שניות (בדיוק כמו באימון!)
NOISE_MIN, NOISE_MAX = 3.0, 4.0

# התפלגות זהה לאימון
MIX: Dict[str, float] = {           
    "compression_artifact": 0.50,  
    "volume_drop":          0.35,  
    "signal_loss":          0.15,  
}

# הפרשים בין רעשים
MIN_NOISE_SPACING = 15.0  # 15 שניות מינימום בין רעשים

DATA_DIR  = OUTPUT_DIR / "data"
DATA_DIR.mkdir(parents=True, exist_ok=True)
LABEL_CSV = OUTPUT_DIR / "labels.csv"

# ───────── NOISE OPERATIONS (זהה לאימון!) ────────────────────
def apply_signal_loss(audio, start_s, end_s):
    before = audio[:int(start_s*1000)]
    target = AudioSegment.silent(duration=int((end_s-start_s)*1000))
    after = audio[int(end_s*1000):]
    return before + target + after

def apply_volume_drop(audio, start_s, end_s):
    before = audio[:int(start_s*1000)]
    target = audio[int(start_s*1000):int(end_s*1000)]
    fade_duration = min(300, len(target)//2)
    target = target.fade_out(fade_duration).fade_in(fade_duration) - 23
    after = audio[int(end_s*1000):]
    return before + target + after

def apply_compression_artifact(audio, start_s, end_s):
    before = audio[:int(start_s*1000)]
    target = audio[int(start_s*1000):int(end_s*1000)]
    target = (target.set_sample_width(1)
                   .set_frame_rate(5000)
                   .low_pass_filter(2000) - 20)
    after = audio[int(end_s*1000):]
    return before + target + after

NOISE_FUNCTIONS = {
    "signal_loss":          apply_signal_loss,
    "volume_drop":          apply_volume_drop,
    "compression_artifact": apply_compression_artifact,
}

# ───────── HELPER FUNCTIONS ─────────────────────────────────────
def time_to_mmss(seconds):
    """המרת שניות לפורמט MM:SS"""
    mm = int(seconds // 60)
    ss = int(seconds % 60)
    return f"{mm:02d}:{ss:02d}"

def generate_noise_events(audio_duration: float, num_noises: int) -> List[Dict]:
    """יוצר רשימת אירועי רעש לקובץ שלם"""
    noise_types = list(MIX.keys())
    noise_weights = list(MIX.values())
    
    noise_events = []
    used_starts = []
    
    for _ in range(num_noises):
        # ניסיון למצוא מיקום מתאים
        for attempt in range(100):  # מקסימום 100 ניסיונות
            noise_duration = random.uniform(NOISE_MIN, NOISE_MAX)
            
            if noise_duration >= audio_duration:
                continue
                
            start_time = random.uniform(0, audio_duration - noise_duration)
            
            # בדיקה שיש מרווח מספיק מרעשים קיימים
            if all(abs(start_time - existing) >= MIN_NOISE_SPACING for existing in used_starts):
                end_time = start_time + noise_duration
                noise_type = random.choices(noise_types, weights=noise_weights, k=1)[0]
                
                noise_events.append({
                    "start_time": round(start_time, 3),
                    "end_time": round(end_time, 3),
                    "duration": round(noise_duration, 3),
                    "noise_type": noise_type
                })
                
                used_starts.append(start_time)
                break
    
    # מיון לפי זמן התחלה
    noise_events.sort(key=lambda x: x["start_time"])
    return noise_events

# ───────── MAIN PROCESSING ──────────────────────────────────────
sources = [p for p in WAV_DIR.iterdir() if p.suffix == ".wav"]
if not sources:
    raise RuntimeError(f"No WAV files found in {WAV_DIR}")

labels = []
random.seed(42)  # למען שחזור

print(f"🎯 Creating test files with 10-15 noises each...")
print(f"▶ Processing {len(sources)} test WAV files from {WAV_DIR}")
print(f"📊 Noise distribution: {MIX}")

for src_idx, src in enumerate(sources):
    print(f"   📁 Processing file {src_idx+1}/{len(sources)}: {src.name}")
    
    try:
        # טעינת הקובץ המקורי
        audio = AudioSegment.from_file(src)
        audio_duration = len(audio) / 1000.0  # seconds
        
        print(f"      Duration: {audio_duration:.1f}s")
        
        # קביעת מספר רעשים לקובץ הזה
        num_noises = random.randint(MIN_NOISES_PER_FILE, MAX_NOISES_PER_FILE)
        
        # יצירת אירועי רעש
        noise_events = generate_noise_events(audio_duration, num_noises)
        
        if len(noise_events) < 8:  # לפחות 8 רעשים
            print(f"   ⚠️ Skipping {src.name} - only {len(noise_events)} noises generated")
            continue
        
        print(f"      Adding {len(noise_events)} noise events")
        
        # יישום הרעשים על הקובץ
        noisy_audio = audio
        for event in noise_events:
            start_s = event["start_time"]
            end_s = event["end_time"]
            noise_type = event["noise_type"]
            
            noisy_audio = NOISE_FUNCTIONS[noise_type](noisy_audio, start_s, end_s)
        
        # שמירת הקובץ עם הרעשים
        output_filename = f"{src.stem}_with_noises.wav"
        output_path = DATA_DIR / output_filename
        noisy_audio.export(output_path, format="wav")
        print(f"      ✅ Saved: {output_filename}")
        
        # הוספת לייבלים - רשומה נפרדת לכל רעש
        for event in noise_events:
            labels.append({
                "filename": output_filename,
                "noise_type": event["noise_type"],
                "start_time": event["start_time"],
                "end_time": event["end_time"],
                "duration": event["duration"],
                "start_time_mmss": time_to_mmss(event["start_time"]),
                "end_time_mmss": time_to_mmss(event["end_time"])
            })
            
    except Exception as e:
        print(f"   ❌ Error processing {src.name}: {e}")
        continue

# ───────── SAVE LABELS CSV ───────────────────────────────────────
fieldnames = ["filename", "noise_type", "start_time", "end_time", "duration", "start_time_mmss", "end_time_mmss"]

with open(LABEL_CSV, "w", newline="", encoding="utf-8") as f:
    w = csv.DictWriter(f, fieldnames=fieldnames)
    w.writeheader()
    w.writerows(labels)

print(f"\n✅ Done! Test files with noises created:")
print(f"   📁 Files dir: {DATA_DIR.resolve()}")
print(f"   📋 Labels: {LABEL_CSV.resolve()}")
print(f"   📊 Total test files: {len(set(label['filename'] for label in labels))}")
print(f"   📊 Total noise instances: {len(labels)}")
if len(labels) > 0:
    print(f"   📊 Average noises per file: {len(labels) / len(set(label['filename'] for label in labels)):.1f}")
else:
    print("   ⚠️ No labels generated!")

ModuleNotFoundError: No module named 'pydub'

In [2]:
import os
import librosa
import numpy as np
import pandas as pd
import gc
from tqdm import tqdm

# ───────── PATHS ─────────────────────────────────────────────────
AUDIO_DIR   = "test_dataset/data"              # קבצי הטסט עם הרעשים
LABELS_PATH = "test_dataset/labels.csv"        # לייבלים של הטסט
OUTPUT_DIR  = "preprocessed_test"              # פלט לקבצי הטסט המעובדים
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ───────── AUDIO PARAMS ──────────────────────────────────────────
SR         = 16000
N_MELS     = 64
HOP_LENGTH = 160
N_FFT      = 400

# ───────── NOISE MAPPING ─────────────────────────────────────────
noise_to_idx = {
    'signal_loss':          0,
    'volume_drop':          1,
    'compression_artifact': 2
}

print("🔍 Loading test labels and file list...")
labels_df = pd.read_csv(LABELS_PATH)

file_list = sorted([f for f in os.listdir(AUDIO_DIR) if f.endswith(".wav")])
total_files = len(file_list)

# Check how many already exist
existing = sum(1 for f in file_list 
               if os.path.exists(os.path.join(OUTPUT_DIR, f"{os.path.splitext(f)[0]}_X.npy")))

print(f"📊 Found {total_files} test audio files")
print(f"📋 {existing} files already processed")
print(f"🎯 {total_files - existing} files to process")

if total_files == existing:
    print("✅ All test files already processed!")
else:
    print("🚀 Starting sequential processing of test files...")
    
    successful = 0
    skipped = 0
    errors = []
    
    # Process files one by one with progress bar
    for filename in tqdm(file_list, desc="Processing test files"):
        name = os.path.splitext(filename)[0]
        x_path = os.path.join(OUTPUT_DIR, f"{name}_X.npy")
        y_path = os.path.join(OUTPUT_DIR, f"{name}_y.npy")

        # Skip if already exists
        if os.path.exists(x_path) and os.path.exists(y_path):
            skipped += 1
            continue

        path = os.path.join(AUDIO_DIR, filename)
        
        try:
            # Load audio with optimized settings (same as training)
            y_audio, _ = librosa.load(path, sr=SR, mono=True, dtype=np.float32)
            
            if len(y_audio) < N_FFT:
                errors.append(f"{filename}: too_short")
                continue

            # Optimized spectrogram computation (exactly like training)
            mel = librosa.feature.melspectrogram(
                y=y_audio, sr=SR, n_fft=N_FFT,
                hop_length=HOP_LENGTH, n_mels=N_MELS,
                power=2.0  # Faster than default
            )
            
            # Log conversion and normalization (same as training)
            log_mel = librosa.power_to_db(mel, ref=np.max)
            log_mel = (log_mel + 80) / 80  # Fixed normalization [-80, 0] -> [0, 1]
            log_mel = np.clip(log_mel, 0, 1)
            
            T = log_mel.shape[1]
            
            # Create label matrix (same structure as training)
            label_matrix = np.zeros((T, 3), dtype=np.float32)
            file_labels = labels_df[labels_df['filename'] == filename]
            
            for _, row in file_labels.iterrows():
                start_sec, end_sec = float(row['start_time']), float(row['end_time'])
                ntype = row['noise_type']

                if ntype not in noise_to_idx:
                    continue

                start_idx = librosa.time_to_frames(start_sec, sr=SR, hop_length=HOP_LENGTH)
                end_idx = librosa.time_to_frames(end_sec, sr=SR, hop_length=HOP_LENGTH)
                start_idx = max(0, min(start_idx, T-1))
                end_idx = max(start_idx+1, min(end_idx, T))

                if start_idx < end_idx:
                    label_matrix[start_idx:end_idx, noise_to_idx[ntype]] = 1.0

            # Save with compression (same format as training)
            np.save(x_path, log_mel.astype(np.float32))
            np.save(y_path, label_matrix)
            
            successful += 1
            
            # Memory cleanup every 10 files (more frequent for test)
            if successful % 10 == 0:
                del y_audio, mel, log_mel, label_matrix
                gc.collect()
                
        except Exception as e:
            errors.append(f"{filename}: {str(e)}")
            continue

    # Final summary
    print(f"\n🎉 Test Processing Complete!")
    print(f"   ✅ Successfully processed: {successful}")
    print(f"   ⏭️ Skipped (already done): {skipped}")
    print(f"   ❌ Errors: {len(errors)}")
    print(f"   📁 Output directory: {os.path.abspath(OUTPUT_DIR)}")
    
    if errors:
        print(f"\n⚠️ Error details:")
        for error in errors[:5]:  # Show first 5 errors
            print(f"   {error}")
        if len(errors) > 5:
            print(f"   ... and {len(errors)-5} more errors")

# Force cleanup
gc.collect()

print(f"\n✅ Test data ready for model evaluation!")

🔍 Loading test labels and file list...
📊 Found 11 test audio files
📋 0 files already processed
🎯 11 files to process
🚀 Starting sequential processing of test files...


Processing test files: 100%|██████████| 11/11 [00:14<00:00,  1.31s/it]


🎉 Test Processing Complete!
   ✅ Successfully processed: 11
   ⏭️ Skipped (already done): 0
   ❌ Errors: 0
   📁 Output directory: f:\DEPP\data2\thebest\test\preprocessed_test

✅ Test data ready for model evaluation!





In [5]:
# ═══════════════════════════════════════════════════════════════════
# 📊 MODEL TEST EVALUATION ON NEW TEST DATA
# ═══════════════════════════════════════════════════════════════════

import numpy as np
import os

print("🧪 Evaluating model on NEW test data...")

# Setup NEW test data (NOT from training set)
TEST_DATA_DIR = "preprocessed_test"  # קבצי הטסט החדשים שלנו
X_test_files = sorted([f for f in os.listdir(TEST_DATA_DIR) if f.endswith("_X.npy")])
y_test_files = sorted([f for f in os.listdir(TEST_DATA_DIR) if f.endswith("_y.npy")])

print(f"📊 Found {len(X_test_files)} test files to evaluate")

def load_test_xy(i):
    """טוען קובץ טסט עם אותה שיטה כמו באימון"""
    X = np.load(os.path.join(TEST_DATA_DIR, X_test_files[i])).astype(np.float32)
    y = np.load(os.path.join(TEST_DATA_DIR, y_test_files[i])).astype(np.float32)
    
    # ודא שהצורה נכונה (T, 3)
    if y.shape[0] == 3 and y.shape[1] != 3: 
        y = y.T
    
    MAX_T = 800  # אותו מגבלת זמן כמו באימון
    actual_T = min(X.shape[1], y.shape[0], MAX_T)
    X = X[:, :actual_T]
    y = y[:actual_T]
    
    return X[..., None], y

# רשימות לאיסוף התוצאות
all_predictions = []
all_true_labels = []
file_results = []

# עבור על כל קבצי הטסט החדשים
for i, test_file in enumerate(X_test_files):
    print(f"🔄 Processing test file {i+1}/{len(X_test_files)}: {test_file}")
    
    try:
        X, y_true = load_test_xy(i)
        X_input = X[np.newaxis, :]  # הוסף batch dimension
        
        # חיזוי עם המודל המאומן
        y_pred = model.predict(X_input, verbose=0)[0]
        
        # יישור האורכים במקרה של הבדלים קטנים
        min_length = min(y_true.shape[0], y_pred.shape[0])
        y_true = y_true[:min_length]
        y_pred = y_pred[:min_length]
        
        all_predictions.append(y_pred)
        all_true_labels.append(y_true)
        
        # סטטיסטיקות לקובץ בודד
        file_frames = len(y_true)
        file_noise_frames = np.sum(np.any(y_true > 0.5, axis=1))
        file_results.append({
            'filename': test_file,
            'total_frames': file_frames,
            'noise_frames': file_noise_frames,
            'noise_percentage': (file_noise_frames / file_frames) * 100
        })
        
    except Exception as e:
        print(f"❌ Error processing {test_file}: {e}")
        continue

# ניתוח תוצאות כללי
if all_predictions:
    all_pred_concat = np.vstack(all_predictions)
    all_true_concat = np.vstack(all_true_labels)
    
    # רף החלטה - 0.8 (רף גבוה לדיוק גבוה כמו באימון)
    THRESHOLD = 0.8
    predictions_binary = (all_pred_concat > THRESHOLD).astype(int)
    true_labels_binary = (all_true_concat > 0.5).astype(int)
    
    # סטטיסטיקות כלליות
    total_frames = len(all_true_concat)
    frames_clean = np.sum(~np.any(true_labels_binary, axis=1))
    frames_with_noise = np.sum(np.any(true_labels_binary, axis=1))
    
    # שגיאות
    false_positives = np.sum(np.any(predictions_binary, axis=1) & ~np.any(true_labels_binary, axis=1))
    false_negatives = np.sum(np.any(true_labels_binary, axis=1) & ~np.any(predictions_binary, axis=1))
    true_positives = np.sum(np.any(predictions_binary, axis=1) & np.any(true_labels_binary, axis=1))
    true_negatives = np.sum(~np.any(predictions_binary, axis=1) & ~np.any(true_labels_binary, axis=1))
    
    # חישוב מדדים
    fp_rate = (false_positives / frames_clean * 100) if frames_clean > 0 else 0
    precision = (true_positives / (true_positives + false_positives)) if (true_positives + false_positives) > 0 else 0
    recall = (true_positives / (true_positives + false_negatives)) if (true_positives + false_negatives) > 0 else 0
    f1_score = (2 * precision * recall / (precision + recall)) if (precision + recall) > 0 else 0
    accuracy = (true_positives + true_negatives) / total_frames
    
    print(f"\n🎯 TEST RESULTS SUMMARY:")
    print(f"=" * 50)
    print(f"📊 Total frames analyzed: {total_frames:,}")
    print(f"🔸 Clean frames: {frames_clean:,} ({(frames_clean/total_frames)*100:.1f}%)")
    print(f"🔶 Frames with noise: {frames_with_noise:,} ({(frames_with_noise/total_frames)*100:.1f}%)")
    print(f"\n🎯 DETECTION PERFORMANCE:")
    print(f"✅ True Positives: {true_positives:,}")
    print(f"✅ True Negatives: {true_negatives:,}")
    print(f"❌ False Positives: {false_positives:,}")
    print(f"❌ False Negatives: {false_negatives:,}")
    print(f"\n📈 METRICS:")
    print(f"🎯 Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
    print(f"🎯 Precision: {precision:.4f} ({precision*100:.2f}%)")
    print(f"🎯 Recall: {recall:.4f} ({recall*100:.2f}%)")
    print(f"🎯 F1-Score: {f1_score:.4f}")
    print(f"⚠️ False Positive Rate: {fp_rate:.3f}%")
    
    # ביצועים לפי סוג רעש
    noise_names = ['signal_loss', 'volume_drop', 'compression_artifact']
    print(f"\n🔍 PER-NOISE TYPE PERFORMANCE:")
    print(f"=" * 50)
    
    total_detected = 0
    total_true = 0
    
    for i, noise_name in enumerate(noise_names):
        true_noise = true_labels_binary[:, i]
        pred_noise = predictions_binary[:, i]
        
        tp_noise = np.sum((true_noise == 1) & (pred_noise == 1))
        fp_noise = np.sum((true_noise == 0) & (pred_noise == 1))
        fn_noise = np.sum((true_noise == 1) & (pred_noise == 0))
        true_count = np.sum(true_noise)
        
        total_detected += tp_noise
        total_true += true_count
        
        noise_precision = tp_noise / (tp_noise + fp_noise) if (tp_noise + fp_noise) > 0 else 0
        noise_recall = tp_noise / (tp_noise + fn_noise) if (tp_noise + fn_noise) > 0 else 0
        noise_f1 = 2 * noise_precision * noise_recall / (noise_precision + noise_recall) if (noise_precision + noise_recall) > 0 else 0
        
        print(f"🎯 {noise_name}:")
        print(f"   Detected: {tp_noise}/{true_count} instances")
        print(f"   Precision: {noise_precision:.3f}, Recall: {noise_recall:.3f}, F1: {noise_f1:.3f}")
    
    overall_recall = total_detected / total_true if total_true > 0 else 0
    print(f"\n🏆 OVERALL PERFORMANCE:")
    print(f"🎯 Overall Recall: {overall_recall:.3f} ({overall_recall*100:.1f}%)")
    print(f"⚠️ False Positive Rate: {fp_rate:.3f}%")
    
    # סיכום ביצועים
    print(f"\n📋 PERFORMANCE SUMMARY:")
    print(f"=" * 50)
    if fp_rate < 0.1 and recall > 0.95:
        print(f"🏆 EXCELLENT: Very low FP rate ({fp_rate:.3f}%) and high recall ({recall*100:.1f}%)")
    elif fp_rate < 0.5 and recall > 0.9:
        print(f"✅ VERY GOOD: Low FP rate ({fp_rate:.3f}%) and good recall ({recall*100:.1f}%)")
    elif fp_rate < 1.0 and recall > 0.8:
        print(f"👍 GOOD: Acceptable FP rate ({fp_rate:.3f}%) and recall ({recall*100:.1f}%)")
    else:
        print(f"⚠️ NEEDS IMPROVEMENT: FP rate {fp_rate:.3f}%, recall {recall*100:.1f}%")
    
    print(f"✅ Model evaluation completed on {len(X_test_files)} test files")

else:
    print("❌ No test files processed successfully")

🧪 Evaluating model on NEW test data...
📊 Found 11 test files to evaluate
🔄 Processing test file 1/11: AimeeMullins_2009P_with_noises_X.npy
🔄 Processing test file 2/11: BillGates_2010_with_noises_X.npy
🔄 Processing test file 2/11: BillGates_2010_with_noises_X.npy
🔄 Processing test file 3/11: DanBarber_2010_with_noises_X.npy
🔄 Processing test file 3/11: DanBarber_2010_with_noises_X.npy
🔄 Processing test file 4/11: DanielKahneman_2010_with_noises_X.npy
🔄 Processing test file 4/11: DanielKahneman_2010_with_noises_X.npy
🔄 Processing test file 5/11: EricMead_2009P_with_noises_X.npy
🔄 Processing test file 5/11: EricMead_2009P_with_noises_X.npy
🔄 Processing test file 6/11: GaryFlake_2010_with_noises_X.npy
🔄 Processing test file 6/11: GaryFlake_2010_with_noises_X.npy
🔄 Processing test file 7/11: JamesCameron_2010_with_noises_X.npy
🔄 Processing test file 7/11: JamesCameron_2010_with_noises_X.npy
🔄 Processing test file 8/11: JaneMcGonigal_2010_with_noises_X.npy
🔄 Processing test file 8/11: JaneMc

conda activate tf215
cd F:\DEPP\data2\data3\test
python test_terminal.py


In [6]:
# ╔═══════════════════════════════════════════════════════════════╗
# ⚡ SUPER-FAST CPU INFERENCE (3 files only, vectorized batching)
# ╚═══════════════════════════════════════════════════════════════╝

import os, time, numpy as np, tensorflow as tf

# ── CPU threading optimization
try:
    n_threads = max(1, (os.cpu_count() or 4) - 1)
    os.environ["OMP_NUM_THREADS"] = str(n_threads)
    os.environ["MKL_NUM_THREADS"] = str(n_threads)
    tf.config.threading.set_intra_op_parallelism_threads(n_threads)
    tf.config.threading.set_inter_op_parallelism_threads(n_threads)
except Exception:
    pass

# ── Constants
DATA_DIR   = "preprocessed_test"
LABELS     = ["signal_loss", "volume_drop", "compression_artifact"]
THRESHOLDS = np.array([0.8, 0.8, 0.8], dtype=np.float32)
SR, HOP = 16000, 160
FRAME_TIME = HOP / SR
MIN_FP_SEC = 3.0  # ✅ Changed to 3.0 seconds as requested
MIN_FP_FRM = int(round(MIN_FP_SEC / FRAME_TIME))

def mmss(sec):
    m, s = divmod(int(sec), 60)
    return f"{m:02d}:{s:02d}"

def find_events(bin_arr):
    x = bin_arr.astype(np.int8)
    edges = np.flatnonzero(np.diff(np.r_[0, x, 0]))
    return list(zip(edges[::2], edges[1::2]))

# ── Fast compiled forward function (without JIT for compatibility)
@tf.function
def forward_call(batch_64_T1):
    return model(batch_64_T1, training=False)

def infer_cpu_vectorized(X_64_T, max_chunk=512, batch_chunks=16):
    """
    Ultra-fast CPU inference:
    - Pads temporal axis to multiple of max_chunk
    - Vectorized reshape+transpose to create chunk batches without loops
    - Runs in bursts of batch_chunks to save memory
    """
    T = int(X_64_T.shape[1])
    if T == 0:
        return np.zeros((0, 3), dtype=np.float32)

    # Pad temporal axis to multiple of max_chunk
    num_chunks = (T + max_chunk - 1) // max_chunk
    pad_T = num_chunks * max_chunk
    if pad_T != T:
        X_pad = np.pad(X_64_T, ((0, 0), (0, pad_T - T)), mode="constant")
    else:
        X_pad = X_64_T

    # Vectorized reshape: (64, pad_T) -> (num_chunks, 64, max_chunk, 1)
    X_blocks = X_pad.reshape(64, num_chunks, max_chunk)
    X_batches_all = np.transpose(X_blocks, (1, 0, 2))[:, :, :, None]

    preds_out = np.empty((pad_T, 3), dtype=np.float32)

    # One-time warm-up (cached by tf.function)
    _ = forward_call(tf.zeros((1, 64, max_chunk, 1), dtype=tf.float32))

    # Process in large but manageable bursts for CPU
    for i in range(0, num_chunks, batch_chunks):
        j = min(i + batch_chunks, num_chunks)
        batch = tf.convert_to_tensor(X_batches_all[i:j], dtype=tf.float32)
        yb = forward_call(batch).numpy()
        preds_out[i*max_chunk : j*max_chunk] = yb.reshape(-1, 3)

    return preds_out[:T]

def analyze_file(base, y_true, probs):
    """
    ✅ CORRECTED FP LOGIC: Only count as FP when model predicts noise where GT has NO noise
    AND only if duration ≥ 2.5 seconds
    """
    y_pred = (probs > THRESHOLDS[None, :]).astype(np.uint8)
    
    # ✅ Key correction: FP only when model says "any noise" but GT says "no noise at all"
    gt_has_any_noise = (np.any(y_true == 1, axis=1)).astype(np.uint8)
    pred_has_any_noise = (np.any(y_pred == 1, axis=1)).astype(np.uint8)

    # GT events and recall calculation
    gt_events = find_events(gt_has_any_noise)
    total_gt_sec = sum((e - s) * FRAME_TIME for s, e in gt_events)
    overlap_sec = np.sum(gt_has_any_noise & pred_has_any_noise) * FRAME_TIME
    recall = (overlap_sec / total_gt_sec) if total_gt_sec > 1e-9 else 0.0

    # ✅ TRUE False Positives: model predicts "any noise" where GT has "no noise at all"
    true_fp_mask = (pred_has_any_noise == 1) & (gt_has_any_noise == 0)
    fp_events = find_events(true_fp_mask)
    fp_count = sum(1 for s, e in fp_events if (e - s) >= MIN_FP_FRM)

    print(f"   ✓ GT events: {len(gt_events)} | Recall: {recall:.1%} | TRUE FP≥{MIN_FP_SEC:.1f}s: {fp_count}")
    return fp_count

# ── Safety check
if 'model' not in globals():
    raise RuntimeError("⚠️ model not loaded - run model loading cell first")

# ── Process ALL files in directory
label_files = sorted(f for f in os.listdir(DATA_DIR) if f.endswith("_y.npy"))  # ALL FILES!
print(f"🚀 FULL TEST: {len(label_files)} files | CPU threads≈{n_threads}")
print(f"Params: CHUNK=512, BATCH=16, TH={THRESHOLDS.tolist()}")
print(f"✅ CORRECTED FP: Only when predicting noise where GT has NO noise, ≥{MIN_FP_SEC:.1f}s")
print(f"Files: {[f[:-6] for f in label_files]}")

t_all = time.time()
global_fp_total = 0

for idx, y_file in enumerate(label_files, 1):
    base = y_file[:-6]
    x_file = base + "_X.npy"
    print(f"\n▶ [{idx}/3] {base}")

    try:
        y_true = np.load(os.path.join(DATA_DIR, y_file), mmap_mode='r')
        X = np.load(os.path.join(DATA_DIR, x_file), mmap_mode='r')
        T = int(X.shape[1])
        print(f"   • Frames: {T} (~{mmss(T*FRAME_TIME)})")

        t0 = time.time()
        probs = infer_cpu_vectorized(X, max_chunk=512, batch_chunks=16)
        dt = time.time() - t0

        fp_count = analyze_file(base, y_true, probs)
        global_fp_total += fp_count
        print(f"   • Inference: {dt:.1f}s")

    except Exception as e:
        print(f"   ✗ Error: {e}")

print(f"\n🎯 SUMMARY: Total TRUE FP≥{MIN_FP_SEC:.1f}s: {global_fp_total}")
print(f"⚡ Done in {time.time() - t_all:.1f}s total")
print("✅ Super-fast test completed!")

🚀 FULL TEST: 11 files | CPU threads≈11
Params: CHUNK=512, BATCH=16, TH=[0.800000011920929, 0.800000011920929, 0.800000011920929]
✅ CORRECTED FP: Only when predicting noise where GT has NO noise, ≥3.0s
Files: ['AimeeMullins_2009P_with_noises', 'BillGates_2010_with_noises', 'DanBarber_2010_with_noises', 'DanielKahneman_2010_with_noises', 'EricMead_2009P_with_noises', 'GaryFlake_2010_with_noises', 'JamesCameron_2010_with_noises', 'JaneMcGonigal_2010_with_noises', 'MichaelSpecter_2010_with_noises', 'RobertGupta_2010U_with_noises', 'TomWujec_2010U_with_noises']

▶ [1/3] AimeeMullins_2009P_with_noises
   • Frames: 131822 (~21:58)
   ✓ GT events: 15 | Recall: 79.6% | TRUE FP≥3.0s: 0
   • Inference: 28.9s

▶ [2/3] BillGates_2010_with_noises
   • Frames: 177203 (~29:32)
   ✓ GT events: 15 | Recall: 79.6% | TRUE FP≥3.0s: 0
   • Inference: 28.9s

▶ [2/3] BillGates_2010_with_noises
   • Frames: 177203 (~29:32)
   ✓ GT events: 13 | Recall: 79.4% | TRUE FP≥3.0s: 2
   • Inference: 28.8s

▶ [3/3] DanB

In [7]:
# ═══════════════════════════════════════════════════════════════════
# 🎙️ TRANSCRIBE MISSING TEXT FROM ORIGINAL FILES
# ═══════════════════════════════════════════════════════════════════

import os, time, requests, json
import pandas as pd
from pathlib import Path
from pydub import AudioSegment

print("🎯 Starting transcription of missing text from original files...")

# ───────── AssemblyAI Configuration ────────────────────────────────
API_KEY = "dc11de72508f4103a483dd74c7506cb2"  # Your API key
HDR_UP  = {"authorization": API_KEY}
HDR_TR  = {"authorization": API_KEY, "content-type": "application/json"}
UPL     = "https://api.assemblyai.com/v2/upload"
TRN     = "https://api.assemblyai.com/v2/transcript"

def transcribe_clean_segment(audio_segment: AudioSegment, tag: str) -> str:
    """תמלול קטע אודיו נקי באמצעות AssemblyAI"""
    tmp_file = f"tmp_{tag}_{int(time.time())}.wav"
    try:
        # Export temporary file
        audio_segment.export(tmp_file, format="wav")
        
        # Upload to AssemblyAI
        with open(tmp_file, "rb") as f:
            response = requests.post(UPL, headers=HDR_UP, data=f)
            upload_url = response.json()["upload_url"]
        
        # Request transcription
        transcript_request = {
            "audio_url": upload_url,
            "punctuate": True,
            "format_text": True
        }
        response = requests.post(TRN, json=transcript_request, headers=HDR_TR)
        transcript_id = response.json()["id"]
        
        # Poll for completion
        while True:
            response = requests.get(f"{TRN}/{transcript_id}", headers=HDR_UP)
            result = response.json()
            
            if result["status"] == "completed":
                if result.get("words"):
                    return " ".join(word["text"] for word in result["words"]).strip()
                else:
                    return result.get("text", "").strip()
            elif result["status"] == "error":
                raise RuntimeError(f"AssemblyAI error: {result.get('error', 'Unknown error')}")
            
            time.sleep(3)  # Wait before next poll
            
    finally:
        # Clean up temporary file
        if os.path.exists(tmp_file):
            os.remove(tmp_file)

# ───────── Paths Configuration ─────────────────────────────────────
ORIGINAL_WAV_DIR = Path(r"F:\DEPP\data2\thebest\test\wav")  # קבצים מקוריים
TEST_LABELS_PATH = "test_dataset/labels.csv"                # לייבלים קיימים
OUTPUT_DIR = Path("transcription_dataset")                  # תיקיה חדשה
OUTPUT_DIR.mkdir(exist_ok=True)

# ───────── Load Existing Labels ────────────────────────────────────
print("📋 Loading existing test labels...")
if not os.path.exists(TEST_LABELS_PATH):
    raise FileNotFoundError(f"Labels file not found: {TEST_LABELS_PATH}")

labels_df = pd.read_csv(TEST_LABELS_PATH)
print(f"   Found {len(labels_df)} noise events in {labels_df['filename'].nunique()} files")

# ───────── Map to Original Files ───────────────────────────────────
print("🔗 Mapping to original WAV files...")
original_files = {}
for _, row in labels_df.iterrows():
    noisy_filename = row['filename']
    # Remove "_with_noises.wav" to get original name
    original_name = noisy_filename.replace("_with_noises.wav", ".wav")
    original_path = ORIGINAL_WAV_DIR / original_name
    
    if original_path.exists():
        if original_name not in original_files:
            original_files[original_name] = []
        original_files[original_name].append(row)
    else:
        print(f"   ⚠️ Original file not found: {original_path}")

print(f"   Mapped to {len(original_files)} original files")

# ───────── Noise Type Descriptions ─────────────────────────────────
NOISE_DESCRIPTIONS = {
    "signal_loss": "Complete silence – signal lost",
    "volume_drop": "Volume significantly reduced with fade effects", 
    "compression_artifact": "Strong codec artifacts – heavily distorted audio"
}

# ───────── Process Each Original File ──────────────────────────────
transcription_results = []
total_segments = sum(len(events) for events in original_files.values())
processed_segments = 0

print(f"\n🎙️ Starting transcription of {total_segments} segments...")

for original_filename, noise_events in original_files.items():
    original_path = ORIGINAL_WAV_DIR / original_filename
    print(f"\n📁 Processing: {original_filename} ({len(noise_events)} segments)")
    
    try:
        # Load original audio
        original_audio = AudioSegment.from_file(original_path)
        duration_sec = len(original_audio) / 1000.0
        print(f"   Duration: {duration_sec:.1f}s")
        
        # Process each noise event
        for idx, event in enumerate(noise_events):
            processed_segments += 1
            start_time = float(event['start_time'])
            end_time = float(event['end_time'])
            noise_type = event['noise_type']
            
            print(f"   🔄 Segment {idx+1}/{len(noise_events)} ({processed_segments}/{total_segments}): "
                  f"{noise_type} {start_time:.2f}s-{end_time:.2f}s")
            
            try:
                # Extract clean segment from original file
                start_ms = int(start_time * 1000)
                end_ms = int(end_time * 1000)
                clean_segment = original_audio[start_ms:end_ms]
                
                # Transcribe the clean segment
                tag = f"{original_filename.replace('.wav', '')}_{idx}"
                missing_text = transcribe_clean_segment(clean_segment, tag)
                
                # Store result
                transcription_results.append({
                    "original_filename": original_filename,
                    "noisy_filename": event['filename'],
                    "noise_type": noise_type,
                    "description": NOISE_DESCRIPTIONS.get(noise_type, "Unknown noise type"),
                    "start_time": start_time,
                    "end_time": end_time,
                    "duration": float(event['duration']),
                    "start_time_mmss": event.get('start_time_mmss', f"{int(start_time//60):02d}:{int(start_time%60):02d}"),
                    "end_time_mmss": event.get('end_time_mmss', f"{int(end_time//60):02d}:{int(end_time%60):02d}"),
                    "missing_text": missing_text,
                    "segment_length_chars": len(missing_text),
                    "segment_id": f"{original_filename.replace('.wav', '')}_{start_time:.1f}s"
                })
                
                print(f"      ✅ Transcribed: '{missing_text[:50]}{'...' if len(missing_text) > 50 else ''}'")
                
            except Exception as e:
                print(f"      ❌ Error transcribing segment: {e}")
                # Add error entry
                transcription_results.append({
                    "original_filename": original_filename,
                    "noisy_filename": event['filename'],
                    "noise_type": noise_type,
                    "description": NOISE_DESCRIPTIONS.get(noise_type, "Unknown noise type"),
                    "start_time": start_time,
                    "end_time": end_time,
                    "duration": float(event['duration']),
                    "start_time_mmss": event.get('start_time_mmss', f"{int(start_time//60):02d}:{int(start_time%60):02d}"),
                    "end_time_mmss": event.get('end_time_mmss', f"{int(end_time//60):02d}:{int(end_time%60):02d}"),
                    "missing_text": "[TRANSCRIPTION_ERROR]",
                    "segment_length_chars": 0,
                    "segment_id": f"{original_filename.replace('.wav', '')}_{start_time:.1f}s",
                    "error": str(e)
                })
                continue
    
    except Exception as e:
        print(f"   ❌ Error processing file {original_filename}: {e}")
        continue

# ───────── Save Results ────────────────────────────────────────────
print(f"\n💾 Saving results...")

# Save as CSV
csv_path = OUTPUT_DIR / "missing_text_transcriptions.csv"
results_df = pd.DataFrame(transcription_results)
results_df.to_csv(csv_path, index=False, encoding="utf-8")

# Save as JSON for easy reading
json_path = OUTPUT_DIR / "missing_text_transcriptions.json"
with open(json_path, "w", encoding="utf-8") as f:
    json.dump(transcription_results, f, indent=2, ensure_ascii=False)

# ───────── Summary ─────────────────────────────────────────────────
successful_transcriptions = len([r for r in transcription_results if r["missing_text"] != "[TRANSCRIPTION_ERROR]"])
total_chars = sum(r["segment_length_chars"] for r in transcription_results if r["missing_text"] != "[TRANSCRIPTION_ERROR]")

print(f"\n🎉 Transcription Complete!")
print(f"=" * 60)
print(f"📊 Total segments processed: {len(transcription_results)}")
print(f"✅ Successful transcriptions: {successful_transcriptions}")
print(f"❌ Failed transcriptions: {len(transcription_results) - successful_transcriptions}")
print(f"📝 Total characters transcribed: {total_chars:,}")
print(f"📁 Output directory: {OUTPUT_DIR.resolve()}")
print(f"📄 CSV file: {csv_path.resolve()}")
print(f"📄 JSON file: {json_path.resolve()}")

if successful_transcriptions > 0:
    avg_chars = total_chars / successful_transcriptions
    print(f"📊 Average characters per segment: {avg_chars:.1f}")
    
    # Show breakdown by noise type
    noise_stats = {}
    for result in transcription_results:
        if result["missing_text"] != "[TRANSCRIPTION_ERROR]":
            noise_type = result["noise_type"]
            if noise_type not in noise_stats:
                noise_stats[noise_type] = {"count": 0, "chars": 0}
            noise_stats[noise_type]["count"] += 1
            noise_stats[noise_type]["chars"] += result["segment_length_chars"]
    
    print(f"\n📊 Breakdown by noise type:")
    for noise_type, stats in noise_stats.items():
        avg_chars_type = stats["chars"] / stats["count"] if stats["count"] > 0 else 0
        print(f"   {noise_type}: {stats['count']} segments, {avg_chars_type:.1f} avg chars")

print(f"\n✅ Ready for prompt generation and text prediction!")

🎯 Starting transcription of missing text from original files...
📋 Loading existing test labels...
   Found 141 noise events in 11 files
🔗 Mapping to original WAV files...
   Mapped to 11 original files

🎙️ Starting transcription of 141 segments...

📁 Processing: AimeeMullins_2009P.wav (15 segments)
   Duration: 1318.2s
   🔄 Segment 1/15 (1/141): compression_artifact 57.13s-60.40s
      ✅ Transcribed: 'Down, worn out, weakened.'
   🔄 Segment 2/15 (2/141): volume_drop 134.79s-137.93s
      ✅ Transcribed: 'I mean, from this entry, it would.'
   🔄 Segment 3/15 (3/141): compression_artifact 252.83s-256.19s
      ✅ Transcribed: 'An Italian American whose name apparently was.'
   🔄 Segment 4/15 (4/141): volume_drop 305.98s-309.07s
      ✅ Transcribed: 'He came in to my session.'
   🔄 Segment 5/15 (5/141): compression_artifact 365.35s-368.77s
      ✅ Transcribed: 'And athletic person well into the future. This is ...'
   🔄 Segment 6/15 (6/141): compression_artifact 472.00s-475.38s
      ✅ Tran

In [10]:
# ═══════════════════════════════════════════════════════════════════
# 🎯 DETECT & TRANSCRIBE WITH CONTEXT (FIRST FILE ONLY)
# ═══════════════════════════════════════════════════════════════════

import os, time, json, requests, numpy as np, pandas as pd
from pathlib import Path
from pydub import AudioSegment

print("🎯 Detecting noise segments and transcribing with context...")
print("📋 Processing only FIRST file for testing")

# ───────── Paths & Configuration ────────────────────────────────────
NOISY_AUDIO_DIR = Path("test_dataset/data")        # קבצים עם רעשים 
PREPROCESSED_DIR = "preprocessed_test"              # נתונים מעובדים
CONTEXT_OUTPUT_DIR = Path("context_transcription")  # תיקיה חדשה לתוצאות
CLIPS_DIR = CONTEXT_OUTPUT_DIR / "clips"

# Create output directories
CONTEXT_OUTPUT_DIR.mkdir(exist_ok=True)
CLIPS_DIR.mkdir(exist_ok=True)

# ───────── Model & Detection Parameters ─────────────────────────────
# Use EXACT same parameters as fast inference cell!
SR, HOP = 16000, 160
FRAME_TIME = HOP / SR  # Same as fast inference: 0.01
THRESHOLDS = np.array([0.8, 0.8, 0.8], dtype=np.float32)  # Same as fast inference
MIN_SEGMENT_SEC = 3.0  # Only process segments ≥ 3.0 seconds
MIN_FRAMES = int(round(MIN_SEGMENT_SEC / FRAME_TIME))
MAX_FRAMES = int(round(30.0 / FRAME_TIME))  # Max 30 seconds to avoid very long segments

NOISE_CLASSES = ["signal_loss", "volume_drop", "compression_artifact"]
NOISE_CLASS_IDX = {name: i for i, name in enumerate(NOISE_CLASSES)}

# ───────── AssemblyAI Configuration ────────────────────────────────
API_KEY = "dc11de72508f4103a483dd74c7506cb2"
HDR_UP = {"authorization": API_KEY}
HDR_TR = {"authorization": API_KEY, "content-type": "application/json"}
UPL_EP = "https://api.assemblyai.com/v2/upload"
TRN_EP = "https://api.assemblyai.com/v2/transcript"

def transcribe_with_assemblyai(audio_path: str) -> list:
    """תמלול עם AssemblyAI - מחזיר מילים עם timestamps"""
    try:
        # Upload file
        with open(audio_path, "rb") as f:
            upload_response = requests.post(UPL_EP, headers=HDR_UP, data=f)
            upload_url = upload_response.json()["upload_url"]
        
        # Request transcription with word-level timestamps
        transcript_request = {
            "audio_url": upload_url,
            "punctuate": True,
            "format_text": True
        }
        response = requests.post(TRN_EP, json=transcript_request, headers=HDR_TR)
        transcript_id = response.json()["id"]
        
        # Poll for completion
        while True:
            response = requests.get(f"{TRN_EP}/{transcript_id}", headers=HDR_UP)
            result = response.json()
            
            if result["status"] == "completed":
                return result.get("words", [])
            elif result["status"] == "error":
                raise RuntimeError(f"AssemblyAI error: {result.get('error', 'Unknown error')}")
            
            time.sleep(3)
    except Exception as e:
        print(f"      ❌ Transcription error: {e}")
        return []

def predict_noise_for_file(wav_filename: str) -> np.ndarray:
    """חיזוי רעשים לקובץ באמצעות המודל"""
    npy_file = wav_filename.replace(".wav", "_X.npy")
    npy_path = os.path.join(PREPROCESSED_DIR, npy_file)
    
    if not os.path.exists(npy_path):
        print(f"      ⚠️ No preprocessed file found: {npy_path}")
        return np.array([])
    
    # Load spectrogram data
    X = np.load(npy_path)[None, ..., None]  # Add batch and channel dims
    
    # Predict in chunks to handle memory
    predictions = []
    chunk_size = 1000
    for start in range(0, X.shape[2], chunk_size):
        end = min(start + chunk_size, X.shape[2])
        chunk_pred = model.predict(X[:, :, start:end, :], verbose=0)[0]
        predictions.append(chunk_pred)
    
    return np.concatenate(predictions, axis=0)

def find_noise_runs(binary_vector: np.ndarray) -> list:
    """מציאת רצפים רציפים של רעש"""
    runs = []
    in_run = False
    start_idx = 0
    
    for i, val in enumerate(binary_vector):
        if val and not in_run:
            start_idx = i
            in_run = True
        elif (not val or i == len(binary_vector) - 1) and in_run:
            end_idx = i if not val else i + 1
            run_length = end_idx - start_idx
            
            # Filter by duration
            if MIN_FRAMES <= run_length <= MAX_FRAMES:
                runs.append((start_idx, end_idx))
            in_run = False
    
    return runs

# ───────── Process First File Only ────────────────────────────────────
audio_files = sorted([f for f in NOISY_AUDIO_DIR.glob("*.wav")])[:1]  # רק קובץ ראשון!

if not audio_files:
    raise RuntimeError("No audio files found in test_dataset/data")

print(f"📁 Processing file: {audio_files[0].name}")

# Safety check for model
if 'model' not in globals():
    raise RuntimeError("⚠️ Model not loaded - run model loading cell first")

all_results = []
PRE_CONTEXT_SEC = 20   # 20 seconds before noise
POST_CONTEXT_SEC = 30  # 30 seconds after noise (increased!)
MIN_AFTER_AUDIO = 10   # Minimum audio needed after noise for good context

for file_idx, audio_file in enumerate(audio_files):
    print(f"\n📁 Processing file: {audio_file.name}")
    
    try:
        # Load noisy audio (WITH noise)
        noisy_audio = AudioSegment.from_file(audio_file)
        total_duration_sec = len(noisy_audio) / 1000.0
        print(f"   Duration: {total_duration_sec:.1f}s")
        
        # Get noise predictions from model
        predictions = predict_noise_for_file(audio_file.name)
        if len(predictions) == 0:
            print(f"   ⚠️ No predictions available, skipping file")
            continue
        
        print(f"   🔍 Analyzing {len(predictions)} frames for noise...")
        
        # Debug: Check prediction stats
        for class_idx, class_name in enumerate(NOISE_CLASSES):
            class_probs = predictions[:, class_idx]
            max_prob = np.max(class_probs)
            mean_prob = np.mean(class_probs)
            above_05 = np.sum(class_probs > 0.5)
            above_08 = np.sum(class_probs > 0.8)
            print(f"      📊 {class_name}: max={max_prob:.3f}, mean={mean_prob:.3f}, >0.5={above_05}, >0.8={above_08}")
        
        # Detect noise segments for each class
        detected_segments = []
        for noise_class in NOISE_CLASSES:
            class_idx = NOISE_CLASS_IDX[noise_class]
            
            # Binary detection with same thresholds as fast inference
            binary_pred = (predictions[:, class_idx] >= THRESHOLDS[class_idx]).astype(int)
            noise_runs = find_noise_runs(binary_pred)
            
            for start_frame, end_frame in noise_runs:
                start_sec = start_frame * FRAME_TIME
                end_sec = end_frame * FRAME_TIME
                segment_duration = end_sec - start_sec
                
                print(f"        🔍 Found {noise_class} segment: {start_sec:.1f}s-{end_sec:.1f}s (duration: {segment_duration:.1f}s)")
                
                # ✅ ONLY process segments ≥ 3.0 seconds with enough context after!
                if segment_duration < MIN_SEGMENT_SEC:
                    print(f"        ❌ Skipped: duration {segment_duration:.1f}s < {MIN_SEGMENT_SEC:.1f}s")
                    continue
                
                # Check if there's enough audio after the noise for good context
                remaining_after = total_duration_sec - end_sec  # total_duration_sec is total file duration
                if remaining_after < MIN_AFTER_AUDIO:
                    print(f"        ❌ Skipped: only {remaining_after:.1f}s left after noise (need ≥{MIN_AFTER_AUDIO}s)")
                    continue
                
                # ✅ NO MORE BOUNDARY CHECKS - ALWAYS PROCESS WITH AVAILABLE CONTEXT
                detected_segments.append({
                    "filename": audio_file.name,
                    "start_time": start_sec,
                    "end_time": end_sec,
                    "duration": segment_duration,
                    "noise_type": noise_class,
                    "confidence": float(np.mean(predictions[start_frame:end_frame, class_idx]))
                })
        
        print(f"   ✅ Found {len(detected_segments)} noise segments ≥{MIN_SEGMENT_SEC:.1f}s")
        
        # Process each detected segment
        for seg_idx, segment in enumerate(detected_segments):
            print(f"   🔄 Segment {seg_idx + 1}/{len(detected_segments)}: "
                  f"{segment['noise_type']} {segment['start_time']:.1f}s-{segment['end_time']:.1f}s "
                  f"(duration: {segment['duration']:.1f}s)")
            
            try:
                # Define time ranges for context
                context_start = max(0, segment['start_time'] - PRE_CONTEXT_SEC)
                context_end = min(total_duration_sec, segment['end_time'] + POST_CONTEXT_SEC)
                
                print(f"      📍 Context range: {context_start:.1f}s - {context_end:.1f}s")
                
                # Extract 3 separate clips from the NOISY audio:
                # 1. Before noise (clean context)
                before_start_ms = int(context_start * 1000)
                before_end_ms = int(segment['start_time'] * 1000)
                
                # 2. During noise (noisy segment)
                during_start_ms = int(segment['start_time'] * 1000)
                during_end_ms = int(segment['end_time'] * 1000)
                
                # 3. After noise (clean context)
                after_start_ms = int(segment['end_time'] * 1000)
                after_end_ms = int(context_end * 1000)
                
                # Create clips
                clip_before = noisy_audio[before_start_ms:before_end_ms] if before_end_ms > before_start_ms else None
                clip_during = noisy_audio[during_start_ms:during_end_ms]
                clip_after = noisy_audio[after_start_ms:after_end_ms] if after_end_ms > after_start_ms else None
                
                # Save clips and transcribe each
                results_parts = {}
                
                # Transcribe BEFORE noise
                if clip_before and len(clip_before) > 1000:  # At least 1 second
                    before_path = CLIPS_DIR / f"before_{file_idx}_{seg_idx}.wav"
                    clip_before.export(before_path, format="wav")
                    print(f"      🎙️ Transcribing BEFORE ({len(clip_before)/1000:.1f}s)...")
                    words_before = transcribe_with_assemblyai(str(before_path))
                    results_parts['before'] = {
                        'clip_path': str(before_path),
                        'duration': len(clip_before)/1000,
                        'words': words_before,
                        'text': " ".join([w.get('text', '') for w in words_before])
                    }
                else:
                    results_parts['before'] = {'words': [], 'text': '', 'duration': 0}
                
                # Transcribe DURING noise
                during_path = CLIPS_DIR / f"during_{file_idx}_{seg_idx}.wav"
                clip_during.export(during_path, format="wav")
                print(f"      🎙️ Transcribing DURING noise ({len(clip_during)/1000:.1f}s)...")
                words_during = transcribe_with_assemblyai(str(during_path))
                results_parts['during'] = {
                    'clip_path': str(during_path),
                    'duration': len(clip_during)/1000,
                    'words': words_during,
                    'text': " ".join([w.get('text', '') for w in words_during])
                }
                
                # Transcribe AFTER noise
                if clip_after and len(clip_after) > 1000:  # At least 1 second
                    after_path = CLIPS_DIR / f"after_{file_idx}_{seg_idx}.wav"
                    clip_after.export(after_path, format="wav")
                    print(f"      🎙️ Transcribing AFTER ({len(clip_after)/1000:.1f}s)...")
                    words_after = transcribe_with_assemblyai(str(after_path))
                    results_parts['after'] = {
                        'clip_path': str(after_path),
                        'duration': len(clip_after)/1000,
                        'words': words_after,
                        'text': " ".join([w.get('text', '') for w in words_after])
                    }
                else:
                    results_parts['after'] = {'words': [], 'text': '', 'duration': 0}
                
                # Store comprehensive result
                result = {
                    "file_index": file_idx,
                    "segment_index": seg_idx,
                    "filename": audio_file.name,
                    "noise_type": segment['noise_type'],
                    "confidence": segment['confidence'],
                    "start_time": round(segment['start_time'], 3),
                    "end_time": round(segment['end_time'], 3),
                    "duration": round(segment['duration'], 3),
                    
                    # Context information
                    "context_start_time": round(context_start, 3),
                    "context_end_time": round(context_end, 3),
                    
                    # Before noise results
                    "before_text": results_parts['before']['text'],
                    "before_word_count": len(results_parts['before']['words']),
                    "before_duration": round(results_parts['before']['duration'], 3),
                    
                    # During noise results
                    "during_text": results_parts['during']['text'],
                    "during_word_count": len(results_parts['during']['words']),
                    "during_duration": round(results_parts['during']['duration'], 3),
                    
                    # After noise results
                    "after_text": results_parts['after']['text'],
                    "after_word_count": len(results_parts['after']['words']),
                    "after_duration": round(results_parts['after']['duration'], 3),
                    
                    # Summary
                    "total_context_words": len(results_parts['before']['words']) + len(results_parts['after']['words']),
                    "noise_blocked_transcription": len(results_parts['during']['words']) == 0,
                    "full_results": results_parts
                }
                
                all_results.append(result)
                
                print(f"      ✅ Results:")
                print(f"         BEFORE: {len(results_parts['before']['words'])} words - '{results_parts['before']['text'][:50]}{'...' if len(results_parts['before']['text']) > 50 else ''}'")
                print(f"         DURING: {len(results_parts['during']['words'])} words - '{results_parts['during']['text'][:50]}{'...' if len(results_parts['during']['text']) > 50 else ''}'")
                print(f"         AFTER:  {len(results_parts['after']['words'])} words - '{results_parts['after']['text'][:50]}{'...' if len(results_parts['after']['text']) > 50 else ''}'")
                
            except Exception as e:
                print(f"      ❌ Error processing segment: {e}")
                continue
    
    except Exception as e:
        print(f"   ❌ Error processing file {audio_file.name}: {e}")
        continue

# ───────── Save Results ─────────────────────────────────────────────
print(f"\n💾 Saving results...")

json_path = CONTEXT_OUTPUT_DIR / "context_transcription_results.json"
with open(json_path, "w", encoding="utf-8") as f:
    json.dump(all_results, f, indent=2, ensure_ascii=False)

csv_path = CONTEXT_OUTPUT_DIR / "context_transcription_results.csv"
if all_results:
    results_df = pd.DataFrame(all_results)
    results_df.to_csv(csv_path, index=False, encoding="utf-8")

# ───────── Summary ─────────────────────────────────────────────────
print(f"\n🎉 Context Transcription Complete!")
print(f"=" * 60)
print(f"📊 Total segments ≥{MIN_SEGMENT_SEC:.1f}s processed: {len(all_results)}")
print(f"📁 Output directory: {CONTEXT_OUTPUT_DIR.resolve()}")
print(f"📄 JSON file: {json_path.resolve()}")
print(f"📄 CSV file: {csv_path.resolve()}")
print(f"🎵 Audio clips: {CLIPS_DIR.resolve()}")

if all_results:
    # Statistics
    total_before = sum(r["before_word_count"] for r in all_results)
    total_during = sum(r["during_word_count"] for r in all_results) 
    total_after = sum(r["after_word_count"] for r in all_results)
    total_context = sum(r["total_context_words"] for r in all_results)
    blocked_segments = sum(1 for r in all_results if r["noise_blocked_transcription"])
    
    print(f"\n📊 Transcription Statistics (≥{MIN_SEGMENT_SEC:.1f}s segments only):")
    print(f"   Words BEFORE noise: {total_before:,}")
    print(f"   Words DURING noise: {total_during:,}")
    print(f"   Words AFTER noise: {total_after:,}")
    print(f"   Total context words: {total_context:,}")
    print(f"   Segments blocked by noise: {blocked_segments}/{len(all_results)}")
    
    if len(all_results) > 0:
        print(f"   Average words before per segment: {total_before / len(all_results):.1f}")
        print(f"   Average words after per segment: {total_after / len(all_results):.1f}")
        print(f"   Noise blocking rate: {blocked_segments / len(all_results) * 100:.1f}%")
    
    # Duration statistics
    durations = [r["duration"] for r in all_results]
    avg_duration = sum(durations) / len(durations)
    min_duration = min(durations)
    max_duration = max(durations)
    
    print(f"\n📊 Duration Statistics:")
    print(f"   Average duration: {avg_duration:.1f}s")
    print(f"   Minimum duration: {min_duration:.1f}s")
    print(f"   Maximum duration: {max_duration:.1f}s")
    
    # Noise type breakdown
    noise_stats = {}
    for result in all_results:
        noise_type = result["noise_type"]
        if noise_type not in noise_stats:
            noise_stats[noise_type] = {"count": 0, "total_duration": 0}
        noise_stats[noise_type]["count"] += 1
        noise_stats[noise_type]["total_duration"] += result["duration"]
    
    print(f"\n📊 Noise type breakdown (≥{MIN_SEGMENT_SEC:.1f}s only):")
    for noise_type, stats in noise_stats.items():
        avg_dur = stats["total_duration"] / stats["count"]
        print(f"   {noise_type}: {stats['count']} segments (avg: {avg_dur:.1f}s)")
    
    print(f"\n📄 Detailed results preview:")
    for i, result in enumerate(all_results[:3]):
        print(f"\n--- Segment #{i+1} ---")
        print(f"File: {result['filename']}")
        print(f"Noise: {result['noise_type']} ({result['start_time']:.1f}s-{result['end_time']:.1f}s, {result['duration']:.1f}s)")
        print(f"BEFORE ({result['before_word_count']} words): '{result['before_text'][:80]}{'...' if len(result['before_text']) > 80 else ''}'")
        print(f"DURING ({result['during_word_count']} words): '{result['during_text'][:80]}{'...' if len(result['during_text']) > 80 else ''}'")
        print(f"AFTER  ({result['after_word_count']} words): '{result['after_text'][:80]}{'...' if len(result['after_text']) > 80 else ''}'")
        print(f"Blocked: {result['noise_blocked_transcription']}")

print(f"\n✅ Ready for LLM text prediction with separated context (≥{MIN_SEGMENT_SEC:.1f}s segments only)!")

🎯 Detecting noise segments and transcribing with context...
📋 Processing only FIRST file for testing
📁 Processing file: AimeeMullins_2009P_with_noises.wav

📁 Processing file: AimeeMullins_2009P_with_noises.wav
   Duration: 1318.2s
   🔍 Analyzing 131822 frames for noise...
      📊 signal_loss: max=0.985, mean=0.244, >0.5=2965, >0.8=1768
      📊 volume_drop: max=0.957, mean=0.305, >0.5=8585, >0.8=709
      📊 compression_artifact: max=0.992, mean=0.550, >0.5=63963, >0.8=44848
        🔍 Found signal_loss segment: 12.9s-16.0s (duration: 3.1s)
        🔍 Found signal_loss segment: 826.6s-829.8s (duration: 3.1s)
        🔍 Found signal_loss segment: 1016.2s-1019.8s (duration: 3.6s)
        🔍 Found compression_artifact segment: 590.6s-594.1s (duration: 3.4s)
   ✅ Found 4 noise segments ≥3.0s
   🔄 Segment 1/4: signal_loss 12.9s-16.0s (duration: 3.1s)
      📍 Context range: 0.0s - 46.0s
      🎙️ Transcribing BEFORE (12.9s)...
   🔍 Analyzing 131822 frames for noise...
      📊 signal_loss: max=0.985

In [2]:
# ═══════════════════════════════════════════════════════════════════
# 🤖 GEMINI TEXT PREDICTION
# ═══════════════════════════════════════════════════════════════════

import google.generativeai as genai
import os, json
import pandas as pd

print("🤖 Starting Gemini text prediction...")

# ───────── Load Data ────────────────────────────────────────────────
# Load context clips
with open("context_transcription/context_transcription_results.json", 'r', encoding='utf-8') as f:
    context_clips = json.load(f)

# Load real missing text
with open("transcription_dataset/missing_text_transcriptions.json", 'r', encoding='utf-8') as f:
    real_missing_data = json.load(f)

# Create lookup for real missing text
real_missing_lookup = {}
for entry in real_missing_data:
    filename = entry['noisy_filename']
    start_time = float(entry['start_time'])
    key = f"{filename}_{start_time:.1f}s"
    real_missing_lookup[key] = entry['missing_text']

print(f"📋 Loaded {len(context_clips)} clips and {len(real_missing_data)} real missing texts")

# ───────── Gemini Configuration ────────────────────────────────────
genai.configure(api_key="AIzaSyAzdmFfGrN0-ljCd2AMrI9wz9ETQqUt4ek")
gem = genai.GenerativeModel("gemini-1.5-flash-latest")

def predict_missing_text(before: str, during: str, after: str, duration: float) -> str:
    """חיזוי טקסט חסר עם Gemini"""
    words_expected = max(1, int(duration * 2.5))
    
    prompt = f"""You need to predict missing text from audio transcription.

BEFORE the gap ({len(before.split())} words):
{before}

DURING the gap: [{duration:.1f} seconds of corrupted audio]
{during}

AFTER the gap ({len(after.split())} words):
{after}

Predict approximately {words_expected} words that naturally connect BEFORE and AFTER.
Return only the predicted text, no explanations."""
    
    try:
        response = gem.generate_content(prompt)
        return response.text.strip()
    except Exception as e:
        return f"[ERROR: {str(e)}]"

# ───────── Process All Clips ───────────────────────────────────────
results = []

for i, clip in enumerate(context_clips):
    print(f"\n🔄 Clip {i+1}/{len(context_clips)}: {clip['noise_type']} at {clip['start_time']:.1f}s")
    
    # Get context
    before_text = clip.get('before_text', '')
    during_text = clip.get('during_text', '')
    after_text = clip.get('after_text', '')
    duration = float(clip['end_time']) - float(clip['start_time'])
    
    # Get real missing text
    key = f"{clip['filename']}_{clip['start_time']:.1f}s"
    real_text = real_missing_lookup.get(key, '[not found]')
    
    # Predict with Gemini
    predicted_text = predict_missing_text(before_text, during_text, after_text, duration)
    
    print(f"   ✅ Real: '{real_text}'")
    print(f"   🤖 Predicted: '{predicted_text}'")
    
    # Store result
    results.append({
        "clip": i+1,
        "filename": clip['filename'],
        "noise_type": clip['noise_type'],
        "timing": f"{clip['start_time']:.1f}s-{clip['end_time']:.1f}s",
        "duration": round(duration, 1),
        "before_words": len(before_text.split()),
        "after_words": len(after_text.split()),
        "real_missing": real_text,
        "gemini_prediction": predicted_text
    })

# ───────── Save Results ────────────────────────────────────────────
df = pd.DataFrame(results)
df.to_csv("context_transcription/gemini_final_results.csv", index=False, encoding='utf-8')

print(f"\n🎉 Done! Processed {len(results)} clips")
print(f"📄 Results saved to: gemini_final_results.csv")

# Show summary
found_real = sum(1 for r in results if r['real_missing'] != '[not found]')
successful_pred = sum(1 for r in results if not r['gemini_prediction'].startswith('[ERROR'))

print(f"📊 Real text found: {found_real}/{len(results)}")
print(f"📊 Successful predictions: {successful_pred}/{len(results)}")
print("\n📋 Results:")
for r in results:
    print(f"   {r['clip']}: {r['noise_type']} | Real: '{r['real_missing'][:30]}...' | Predicted: '{r['gemini_prediction'][:30]}...'")

  from .autonotebook import tqdm as notebook_tqdm


🤖 Starting Gemini text prediction...
📋 Loaded 4 clips and 141 real missing texts

🔄 Clip 1/4: signal_loss at 12.9s
   ✅ Real: '[not found]'
   🤖 Predicted: 'So, I did.  And'

🔄 Clip 2/4: signal_loss at 826.6s
   ✅ Real: '[not found]'
   🤖 Predicted: 'So, I did.  And'

🔄 Clip 2/4: signal_loss at 826.6s
   ✅ Real: 'To be even a little bit more dangerous, we can release.'
   🤖 Predicted: 'then we can unleash'

🔄 Clip 3/4: signal_loss at 1016.2s
   ✅ Real: 'To be even a little bit more dangerous, we can release.'
   🤖 Predicted: 'then we can unleash'

🔄 Clip 3/4: signal_loss at 1016.2s
   ✅ Real: 'In my experience, unless rep.'
   🤖 Predicted: '"he had been repeatedly"'

🔄 Clip 4/4: compression_artifact at 590.6s
   ✅ Real: 'In my experience, unless rep.'
   🤖 Predicted: '"he had been repeatedly"'

🔄 Clip 4/4: compression_artifact at 590.6s
   ✅ Real: 'And consistent disability I've had to confront is.'
   🤖 Predicted: 'difference is between medical fact'

🎉 Done! Processed 4 clips
📄 Resul