<a href="https://colab.research.google.com/github/yigitaytt/Deep-Learning-Optimization-Methods-Python/blob/main/Optimization_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install "numpy<2.0"

In [None]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import copy
import time
from sklearn.manifold import TSNE
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from transformers import AutoTokenizer, AutoModel
from sentence_transformers import SentenceTransformer

# 1. Setup and Data Loading
Loads the processed dataset and prepares PyTorch tensors.

In [None]:
# Check device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device set to: {device}")

print("1. Loading Data...")
try:
    # Keeping filenames as is to match your uploaded files
    X_train = np.load("egitim_X.npy")
    y_train = np.load("egitim_y.npy")
    X_test = np.load("test_X.npy")
    y_test = np.load("test_y.npy")
except FileNotFoundError:
    print("ERROR: Data files not found. Please upload 'egitim_X.npy', etc.")
    # Exit command removed for Colab compatibility, using raise instead
    raise SystemExit("Stopping execution due to missing files.")

# Convert to Tensors
X_train_tensor = torch.FloatTensor(X_train).to(device)
y_train_tensor = torch.FloatTensor(y_train).view(-1, 1).to(device)
X_test_tensor = torch.FloatTensor(X_test).to(device)
y_test_tensor = torch.FloatTensor(y_test).view(-1, 1).to(device)

print(f"Training Set Shape: {X_train_tensor.shape}")
print(f"Test Set Shape: {X_test_tensor.shape}")

# 2. Regression Model Architecture
Defines a simple linear regression model with Tanh activation.

In [None]:
class RegressionModel(nn.Module):
    def __init__(self, input_dim):
        super(RegressionModel, self).__init__()
        # Linear layer (bias is implicitly handled if data has bias column,
        # but here we set bias=False based on your original code)
        self.linear = nn.Linear(input_dim, 1, bias=False)
        self.activation = nn.Tanh()

    def forward(self, x):
        return self.activation(self.linear(x))

# 3. Optimization Experiments (GD, SGD, Adam)
Runs training loops with different optimizers across multiple random seeds.

In [None]:
def run_experiments():
    input_dim = X_train.shape[1]
    seeds = [1, 42, 100, 555, 999]  # 5 fixed seeds for reproducibility
    algorithms = ['GD', 'SGD', 'Adam']

    # Dictionary to store results
    results = {
        alg: {
            'train_loss': [], 'test_loss': [],
            'train_acc': [], 'test_acc': [],
            'weights': [], 'time_log': []
        }
        for alg in algorithms
    }

    for seed_idx, seed in enumerate(seeds):
        print(f"\n>>> Processing Seed {seed} ({seed_idx+1}/{len(seeds)})...")

        # Set seed for reproducibility
        torch.manual_seed(seed)

        # Initialize a temporary model to get consistent starting weights for all algos
        temp_model = RegressionModel(input_dim)
        initial_weights = temp_model.linear.weight.data.clone()

        for alg in algorithms:
            # Re-initialize model
            model = RegressionModel(input_dim).to(device)
            # Force same initial weights
            model.linear.weight.data = initial_weights.clone().to(device)

            # Hyperparameters based on algorithm
            if alg == 'GD':
                optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
                batch_size = len(X_train) # Full batch
                epochs = 100
            elif alg == 'SGD':
                optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
                batch_size = 1 # Stochastic
                epochs = 100
            elif alg == 'Adam':
                optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
                batch_size = len(X_train)
                epochs = 100

            criterion = nn.MSELoss()

            # Metric tracking lists for current seed/algo
            loss_tr, loss_te, acc_tr, acc_te, w_hist, t_log = [], [], [], [], [], []

            # Record initial weights
            w_hist.append(model.linear.weight.data.cpu().numpy().flatten().copy())

            start_time = time.time()

            for epoch in range(epochs):
                model.train()
                perm = torch.randperm(len(X_train)) # Shuffle data
                last_batch_loss = 0.0

                # Mini-batch loop
                for i in range(0, len(X_train), batch_size):
                    indices = perm[i:i+batch_size]
                    batch_x, batch_y = X_train_tensor[indices], y_train_tensor[indices]

                    optimizer.zero_grad()
                    outputs = model(batch_x)
                    loss = criterion(outputs, batch_y)
                    loss.backward()
                    optimizer.step()

                    last_batch_loss = loss.item()

                elapsed = time.time() - start_time

                # Evaluation phase
                model.eval()
                with torch.no_grad():
                    l_tr = last_batch_loss

                    # Calculate Accuracy (Sign Check)
                    out_tr_acc = model(X_train_tensor)
                    a_tr = (torch.sign(out_tr_acc) == y_train_tensor).float().mean().item()

                    out_te = model(X_test_tensor)
                    l_te = criterion(out_te, y_test_tensor).item()
                    a_te = (torch.sign(out_te) == y_test_tensor).float().mean().item()

                    # Store metrics
                    loss_tr.append(l_tr)
                    loss_te.append(l_te)
                    acc_tr.append(a_tr)
                    acc_te.append(a_te)
                    t_log.append(elapsed)
                    w_hist.append(model.linear.weight.data.cpu().numpy().flatten().copy())

            # Append run results to main dictionary
            results[alg]['train_loss'].append(loss_tr)
            results[alg]['test_loss'].append(loss_te)
            results[alg]['train_acc'].append(acc_tr)
            results[alg]['test_acc'].append(acc_te)
            results[alg]['weights'].append(w_hist)
            results[alg]['time_log'].append(t_log)

            print(f"   -> {alg} completed. (Final Test Acc: {acc_te[-1]:.2f})")

    return results

# Run the experiments
print("Starting Experiments...")
results = run_experiments()

# Save results for Part B (T-SNE)
np.save("model_results_part_A.npy", results)
print("Results saved to 'model_results_part_A.npy'")

# 4. Visualization of Metrics
Plots Training/Test Loss and Accuracy against Epochs and Time.

In [None]:
def plot_metrics(metric_key, title, ylabel, x_axis='epoch'):
    plt.figure(figsize=(10, 6))
    colors = {'GD': 'blue', 'SGD': 'orange', 'Adam': 'green'}

    for alg in ['GD', 'SGD', 'Adam']:
        data = results[alg][metric_key]

        # Determine X-axis data
        if x_axis == 'time':
            x_data_list = results[alg]['time_log']
        else:
            x_data_list = [list(range(len(d))) for d in data]

        # Truncate to minimum length to match dimensions across seeds
        min_len = min([len(d) for d in data])
        data_np = np.array([d[:min_len] for d in data])
        x_data_np = np.array([d[:min_len] for d in x_data_list])

        # Calculate Mean and Std Dev
        mean_y = np.mean(data_np, axis=0)
        mean_x = np.mean(x_data_np, axis=0)
        std_y = np.std(data_np, axis=0)

        # Plot
        plt.plot(mean_x, mean_y, label=alg, color=colors[alg], linewidth=2)
        plt.fill_between(mean_x, mean_y - (std_y/2), mean_y + (std_y/2), color=colors[alg], alpha=0.1)

    # Adjust limits for time plots
    if x_axis == 'time':
        plt.xlim(0, 2.0) # Adjust this limit based on your actual run times

    plt.title(title, fontsize=14)
    plt.xlabel("Time (Seconds)" if x_axis == 'time' else "Epochs", fontsize=12)
    plt.ylabel(ylabel, fontsize=12)
    plt.legend(fontsize=10)
    plt.grid(True, alpha=0.3)

    # Save plot
    filename = f"PartA_{metric_key}_{x_axis}.png"
    plt.savefig(filename)
    print(f"Plot saved: {filename}")
    plt.show()

print("\nGenerating Plots...")

# Group 1: Epoch-based Metrics
plot_metrics('train_loss', 'Training Loss vs Epochs', 'MSE Loss', x_axis='epoch')
plot_metrics('test_loss', 'Test Loss vs Epochs', 'MSE Loss', x_axis='epoch')
plot_metrics('train_acc', 'Training Accuracy vs Epochs', 'Accuracy', x_axis='epoch')
plot_metrics('test_acc', 'Test Accuracy vs Epochs', 'Accuracy', x_axis='epoch')

# Group 2: Time-based Metrics
plot_metrics('train_loss', 'Training Loss vs Time', 'MSE Loss', x_axis='time')
plot_metrics('test_loss', 'Test Loss vs Time', 'MSE Loss', x_axis='time')
plot_metrics('train_acc', 'Training Accuracy vs Time', 'Accuracy', x_axis='time')
plot_metrics('test_acc', 'Test Accuracy vs Time', 'Accuracy', x_axis='time')

# 5. Data Preparation & t-SNE Dimensionality Reduction
Loads the results from Part A, flattens weight histories, and computes 2D embeddings using t-SNE.

In [None]:
# 1. Load Results
print("1. Loading Part A Results...")
try:
    results = np.load("model_results_part_A.npy", allow_pickle=True).item()
    print("-> Data loaded successfully.")
except FileNotFoundError:
    raise SystemExit("ERROR: 'model_results_part_A.npy' not found. Please run Part A first.")

all_weights = []
trajectory_map = [] # Renamed for clarity

algorithms = ['GD', 'SGD', 'Adam']
colors = {'GD': 'blue', 'SGD': 'orange', 'Adam': 'green'}

idx_counter = 0

# 2. Flatten Weights for t-SNE
print("2. Processing Weight Histories...")
for alg in algorithms:
    weight_histories = results[alg]['weights']

    for seed_i, w_history in enumerate(weight_histories):
        w_history = np.array(w_history)

        # Append all steps to the master list
        for w in w_history:
            all_weights.append(w)

        # Map indices to retrieve them later
        trajectory_map.append({
            'alg': alg,
            'seed': seed_i,
            'start': idx_counter,
            'end': idx_counter + len(w_history)
        })
        idx_counter += len(w_history)

# 3. Compute t-SNE
print("3. Computing t-SNE (This may take a moment)...")
tsne = TSNE(n_components=2, random_state=42, perplexity=30, max_iter=1000, init='pca', learning_rate='auto')
weights_embedded = tsne.fit_transform(np.array(all_weights))
print("-> t-SNE Computation Complete.")

# 6. Trajectory Visualization
Visualizes the path taken by each optimization algorithm from 5 different starting points.

In [None]:
plt.figure(figsize=(15, 10))

# Tracker to ensure legend only appears once per algorithm
legend_added = {'GD': False, 'SGD': False, 'Adam': False}

print("4. Plotting Trajectories...")

# Plot each trajectory based on the map
for info in trajectory_map:
    alg = info['alg']
    seed_id = info['seed']

    # Extract coordinates for this specific run
    path = weights_embedded[info['start']:info['end']]

    # Handle Legend Label
    lbl = alg if not legend_added[alg] else "_nolegend_"

    # Plot Line (Trajectory)
    plt.plot(path[:, 0], path[:, 1],
             color=colors[alg],
             linewidth=0.8,
             alpha=0.4)

    # Plot Points (Steps)
    plt.scatter(path[:, 0], path[:, 1],
             color=colors[alg],
             s=15,
             alpha=0.5,
             label=lbl)

    legend_added[alg] = True

    # Mark Start Points (Stars)
    # Only label start points for one algorithm (e.g., GD) to avoid clutter
    if alg == 'GD':
        plt.plot(path[0, 0], path[0, 1], marker='*', color='black', markersize=18, zorder=10)
        plt.text(path[0, 0], path[0, 1], f"  S{seed_id+1}", fontsize=12, fontweight='bold', color='black')

    # Mark End Points (X)
    plt.plot(path[-1, 0], path[-1, 1], marker='X', color='black', markersize=10, zorder=5)

# Styling
plt.title("2D t-SNE Trajectories of Optimization Algorithms", fontsize=16)
plt.xlabel("Dimension 1", fontsize=12)
plt.ylabel("Dimension 2", fontsize=12)
plt.legend(fontsize=12, loc='best', title="Algorithms")
plt.grid(True, alpha=0.3)

# Add explanatory text
plt.figtext(0.5, 0.01, "* Black Stars (S1-S5): 5 Different Initializations | Lines: Optimization Paths | Black X: Final Weights",
            ha="center", fontsize=11, style='italic')

filename = "PartB_Combined_Trajectory.png"
plt.savefig(filename, dpi=300)
print(f"-> Plot saved: {filename}")
plt.show()

# 7. Bonus: Algorithm Comparison on 2-Layer MLP
Evaluates the performance of GD, SGD, and Adam optimizers on a non-linear architecture (Multi-Layer Perceptron) with ReLU activation.

In [None]:
# 1. Load Data
try:
    X_train = np.load("egitim_X.npy")
    y_train = np.load("egitim_y.npy")
    X_test = np.load("test_X.npy")
    y_test = np.load("test_y.npy")
except FileNotFoundError:
    raise SystemExit("ERROR: Data files (.npy) not found.")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Convert to Tensors
X_train_tensor = torch.FloatTensor(X_train).to(device)
y_train_tensor = torch.FloatTensor(y_train).view(-1, 1).to(device)
X_test_tensor = torch.FloatTensor(X_test).to(device)
y_test_tensor = torch.FloatTensor(y_test).view(-1, 1).to(device)

print(f"Data Prepared. Training Shape: {X_train_tensor.shape}")

# 2. Define MLP Model (2-Layer Neural Network)
class MLPModel(nn.Module):
    def __init__(self, input_dim, hidden_dim=64):
        super(MLPModel, self).__init__()
        self.layer1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.layer2 = nn.Linear(hidden_dim, 1)
        self.activation2 = nn.Tanh()

    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        return self.activation2(x)

# 3. Experiment Setup
algorithms = ['Adam', 'SGD', 'GD']
mlp_results = {alg: {'acc': [], 'loss': []} for alg in algorithms}

input_dim = X_train.shape[1]
epochs = 100

print("\nStarting MLP Experiments...")

for alg in algorithms:
    print(f"   >>> Running {alg}...")

    # Reset model for each algorithm
    model = MLPModel(input_dim, hidden_dim=64).to(device)
    criterion = nn.MSELoss()

    # Optimizer & Batch Size Configuration
    if alg == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        batch_size = len(X_train) # Full Batch for fairness comparison logic in this context
    elif alg == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
        batch_size = 1 # Stochastic
    elif alg == 'GD':
        optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
        batch_size = len(X_train) # Full Batch

    # Training Loop
    for epoch in range(epochs):
        model.train()
        perm = torch.randperm(len(X_train))

        # Mini-batch loop
        for i in range(0, len(X_train), batch_size):
            indices = perm[i:i+batch_size]
            batch_x, batch_y = X_train_tensor[indices], y_train_tensor[indices]

            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

        # Evaluation (Test Set)
        model.eval()
        with torch.no_grad():
            test_out = model(X_test_tensor)
            # Accuracy calculation based on sign match
            test_acc = (torch.sign(test_out) == y_test_tensor).float().mean().item()

            mlp_results[alg]['loss'].append(loss.item())
            mlp_results[alg]['acc'].append(test_acc)

    print(f"       -> {alg} Completed. Final Accuracy: {mlp_results[alg]['acc'][-1]*100:.1f}%")

# 4. Visualization
plt.figure(figsize=(14, 6))
colors = {'Adam': 'green', 'SGD': 'orange', 'GD': 'blue'}

# Plot 1: Test Accuracy
plt.subplot(1, 2, 1)
for alg in algorithms:
    plt.plot(mlp_results[alg]['acc'], label=alg, color=colors[alg], linewidth=2)

plt.axhline(y=0.5, color='gray', linestyle='--', label='Random Baseline (0.5)')
plt.title("Algorithm Comparison on 2-Layer MLP (Accuracy)")
plt.xlabel("Epochs")
plt.ylabel("Test Accuracy")
plt.ylim(0, 1.05)
plt.legend()
plt.grid(True, alpha=0.3)

# Plot 2: Training Loss
plt.subplot(1, 2, 2)
for alg in algorithms:
    plt.plot(mlp_results[alg]['loss'], label=alg, color=colors[alg], linewidth=2)

plt.title("Training Loss (MSE) on MLP")
plt.xlabel("Epochs")
plt.ylabel("MSE Loss")
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
filename = "Bonus_MLP_Algorithm_Comparison.png"
plt.savefig(filename)
print(f"\n-> Plot saved: {filename}")
plt.show()

# 8. Bonus: Impact of Training Data Size (with Noise Injection)
Analyzes how the model's generalization improves as the number of training samples increases. Gaussian noise is added to the training data to simulate real-world imperfections and highlight the importance of larger datasets.

In [None]:
# 1. Load Full Dataset
print("1. Loading Vectors...")
try:
    X_train_full = np.load("egitim_X.npy")
    y_train_full = np.load("egitim_y.npy")
    X_test = np.load("test_X.npy")
    y_test = np.load("test_y.npy")
except FileNotFoundError:
    raise SystemExit("ERROR: .npy files not found.")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Prepare Test Set (Clean - No Noise)
X_test_tensor = torch.FloatTensor(X_test).to(device)
y_test_tensor = torch.FloatTensor(y_test).view(-1, 1).to(device)

# 2. Define Simple Model (Same as Part A)
class SimpleModel(nn.Module):
    def __init__(self, input_dim):
        super(SimpleModel, self).__init__()
        self.linear = nn.Linear(input_dim, 1, bias=False)
        self.activation = nn.Tanh()

    def forward(self, x):
        return self.activation(self.linear(x))

# 3. Experiment Settings
dataset_sizes = [5, 10, 20, 30, 40, 50, 60, 80, 100] # Number of samples to use
avg_accuracies = []
NOISE_LEVEL = 0.3 # Noise intensity to make the task harder

print(f"\n--- Analyzing Effect of Training Set Size (Noise={NOISE_LEVEL}) ---")

input_dim = X_train_full.shape[1]

for size in dataset_sizes:
    current_accs = []

    # Run 10 trials for each size to get a stable average
    for _ in range(10):
        # Randomly sample 'size' examples from training data
        indices = np.random.choice(len(X_train_full), size, replace=False)
        X_subset_np = X_train_full[indices]
        y_subset_np = y_train_full[indices]

        # Inject Noise (Training data is noisy)
        noise = np.random.normal(loc=0.0, scale=NOISE_LEVEL, size=X_subset_np.shape)
        X_subset_noisy = X_subset_np + noise

        # Convert to Tensor
        X_curr = torch.FloatTensor(X_subset_noisy).to(device)
        y_curr = torch.FloatTensor(y_subset_np).view(-1, 1).to(device)

        # Initialize and Train Model
        model = SimpleModel(input_dim).to(device)
        optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
        criterion = nn.MSELoss()

        model.train()
        for _ in range(100):
            optimizer.zero_grad()
            loss = criterion(model(X_curr), y_curr)
            loss.backward()
            optimizer.step()

        # Test Model (Test data is clean)
        model.eval()
        with torch.no_grad():
            preds = torch.sign(model(X_test_tensor))
            acc = (preds == y_test_tensor).float().mean().item()
            current_accs.append(acc)

    # Average accuracy for this size
    avg_acc = np.mean(current_accs)
    avg_accuracies.append(avg_acc)
    print(f"Data Size: {size} -> Average Accuracy: {avg_acc*100:.1f}%")

# 4. Visualization
plt.figure(figsize=(10, 6))
plt.plot(dataset_sizes, avg_accuracies, 'o-', color='crimson', linewidth=3, label=f"Test Accuracy (Noise={NOISE_LEVEL})")

plt.axhline(y=0.5, color='gray', linestyle='--', label="Random Baseline (0.5)")

plt.title("Effect of Training Set Size on Generalization")
plt.xlabel("Number of Training Samples")
plt.ylabel("Test Accuracy")
plt.ylim(0.4, 1.05)
plt.grid(True, alpha=0.3)
plt.legend()

filename = "Bonus_DataSize_WithNoise.png"
plt.savefig(filename)
print(f"\n-> Plot saved: {filename}")
plt.show()

# 9. Bonus: Comprehensive Optimizer Benchmarking
Expands the comparison to include Adagrad and RMSProp alongside GD, SGD, and Adam. This experiment uses a conservative learning rate to observe the convergence stability of adaptive methods vs. standard methods.

In [None]:
# 1. Load Data
try:
    X_train = np.load("egitim_X.npy")
    y_train = np.load("egitim_y.npy")
    X_test = np.load("test_X.npy")
    y_test = np.load("test_y.npy")
except FileNotFoundError:
    raise SystemExit("ERROR: .npy files not found.")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Convert to Tensors
X_train_tensor = torch.FloatTensor(X_train).to(device)
y_train_tensor = torch.FloatTensor(y_train).view(-1, 1).to(device)
X_test_tensor = torch.FloatTensor(X_test).to(device)
y_test_tensor = torch.FloatTensor(y_test).view(-1, 1).to(device)

print(f"Data Ready: {X_train_tensor.shape}")

# 2. Define Model (Single Layer Linear + Tanh)
class SimpleModel(nn.Module):
    def __init__(self, input_dim):
        super(SimpleModel, self).__init__()
        self.linear = nn.Linear(input_dim, 1, bias=False)
        self.activation = nn.Tanh()

    def forward(self, x):
        return self.activation(self.linear(x))

# 3. Experiment Setup
colors = {
    'GD': 'blue',
    'SGD': 'orange',
    'Adagrad': 'purple',
    'RMSProp': 'red',
    'Adam': 'green'
}

algorithms = ['GD', 'SGD', 'Adagrad', 'RMSProp', 'Adam']
results = {alg: {'acc': [], 'loss': []} for alg in algorithms}

print(f"\n--- Starting 5-Way Algorithm Comparison ---")

input_dim = X_train.shape[1]
epochs = 100

for alg in algorithms:
    print(f"\n>>> Running {alg}...")

    # Reset Model for fairness
    model = SimpleModel(input_dim).to(device)
    criterion = nn.MSELoss()

    # Optimizer Configuration
    # Note: Using a conservative learning rate (0.0001) for observation
    if alg == 'GD':
        optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
        batch_size = len(X_train) # Full Batch

    elif alg == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
        batch_size = 1 # Stochastic

    elif alg == 'Adagrad':
        optimizer = torch.optim.Adagrad(model.parameters(), lr=0.0001)
        batch_size = len(X_train)

    elif alg == 'RMSProp':
        optimizer = torch.optim.RMSprop(model.parameters(), lr=0.0001)
        batch_size = len(X_train)

    elif alg == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
        batch_size = len(X_train)

    # Training Loop
    for epoch in range(epochs):
        model.train()
        perm = torch.randperm(len(X_train))

        # Batch Processing
        for i in range(0, len(X_train), batch_size):
            indices = perm[i:i+batch_size]
            batch_x, batch_y = X_train_tensor[indices], y_train_tensor[indices]

            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

        # Evaluation
        model.eval()
        with torch.no_grad():
            test_out = model(X_test_tensor)
            # Accuracy based on sign match
            test_acc = (torch.sign(test_out) == y_test_tensor).float().mean().item()

            results[alg]['loss'].append(loss.item())
            results[alg]['acc'].append(test_acc)

    print(f"   -> {alg} Finished. Final Accuracy: {results[alg]['acc'][-1]*100:.1f}%")

# 4. Visualization
plt.figure(figsize=(16, 6))

# Subplot 1: Test Accuracy
plt.subplot(1, 2, 1)
for alg in algorithms:
    plt.plot(results[alg]['acc'], label=alg, color=colors[alg], linewidth=2.5, alpha=0.8)

plt.axhline(y=0.5, color='gray', linestyle='--', label='Random Baseline (0.5)')
plt.title("Optimizer Comparison (Accuracy)")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.ylim(0.4, 1.05)
plt.legend(loc='lower right')
plt.grid(True, alpha=0.3)

# Subplot 2: Training Loss
plt.subplot(1, 2, 2)
for alg in algorithms:
    plt.plot(results[alg]['loss'], label=alg, color=colors[alg], linewidth=2.5, alpha=0.8)

plt.title("Training Loss (MSE)")
plt.xlabel("Epochs")
plt.ylabel("MSE Loss")
plt.legend(loc='upper right')
plt.grid(True, alpha=0.3)

plt.tight_layout()
filename = "Bonus_Optimization_Benchmark_Slow.png"
plt.savefig(filename)
print(f"\n-> Plot saved: {filename}")
plt.show()

# 10. Bonus: Semantic Representation Benchmarking (NLP)
Compares traditional statistical methods (TF-IDF) against modern Deep Learning embeddings (Turkish-BERT and Turkish-E5) on a text classification task.

TF-IDF: Frequency-based statistical vectorization.

BERT: Contextual embeddings from dbmdz/bert-base-turkish-cased.

E5: State-of-the-art semantic embeddings from ytu-ce-cosmos/turkish-e5-large.

In [None]:
# 1. Load Data
print("1. Loading and Processing Text Data...")
try:
    df_train = pd.read_csv("egitim_seti.csv")
    df_test = pd.read_csv("test_seti.csv")

    # Combine Question and Answer
    train_texts = [f"Question: {r['Soru']} Answer: {r['Cevap']}" for _, r in df_train.iterrows()]
    test_texts = [f"Question: {r['Soru']} Answer: {r['Cevap']}" for _, r in df_test.iterrows()]

    # Targets
    y_train = torch.FloatTensor(df_train['Etiket'].values).view(-1, 1)
    y_test = torch.FloatTensor(df_test['Etiket'].values).view(-1, 1)

except Exception as e:
    print("ERROR: 'egitim_seti.csv' or 'test_seti.csv' not found.")
    sys.exit()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
y_train = y_train.to(device)
y_test = y_test.to(device)

results_data = {}

# --- MODEL 1: TF-IDF ---
print("\n>>> Feature Extraction 1: TF-IDF...")
vectorizer = TfidfVectorizer(max_features=2000)

X_train_tfidf = vectorizer.fit_transform(train_texts).toarray()
X_test_tfidf = vectorizer.transform(test_texts).toarray()

results_data['TF-IDF'] = {
    'train': torch.FloatTensor(X_train_tfidf).to(device),
    'test': torch.FloatTensor(X_test_tfidf).to(device),
    'dim': X_train_tfidf.shape[1]
}
print(f"   -> TF-IDF Dimension: {results_data['TF-IDF']['dim']}")

# --- MODEL 2: Turkish-BERT ---
print("\n>>> Feature Extraction 2: Turkish-BERT...")
bert_tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")
bert_model = AutoModel.from_pretrained("dbmdz/bert-base-turkish-cased").to(device)

def get_bert_embeddings(text_list):
    embeddings = []
    batch_size = 32
    for i in range(0, len(text_list), batch_size):
        batch = text_list[i:i+batch_size]
        inputs = bert_tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
        with torch.no_grad():
            outputs = bert_model(**inputs)
            cls_emb = outputs.last_hidden_state[:, 0, :].cpu().numpy()
        embeddings.append(cls_emb)
    return np.vstack(embeddings)

X_train_bert = get_bert_embeddings(train_texts)
X_test_bert = get_bert_embeddings(test_texts)

results_data['BERT'] = {
    'train': torch.FloatTensor(X_train_bert).to(device),
    'test': torch.FloatTensor(X_test_bert).to(device),
    'dim': X_train_bert.shape[1]
}
print(f"   -> BERT Dimension: {results_data['BERT']['dim']}")

# --- MODEL 3: Turkish-E5 ---
print("\n>>> Feature Extraction 3: Turkish-E5...")
e5_model = SentenceTransformer('ytu-ce-cosmos/turkish-e5-large')

X_train_e5 = e5_model.encode(train_texts)
X_test_e5 = e5_model.encode(test_texts)

results_data['E5'] = {
    'train': torch.FloatTensor(X_train_e5).to(device),
    'test': torch.FloatTensor(X_test_e5).to(device),
    'dim': X_train_e5.shape[1]
}
print(f"   -> E5 Dimension: {results_data['E5']['dim']}")

# --- Training ---
class Classifier(nn.Module):
    def __init__(self, input_dim):
        super(Classifier, self).__init__()
        self.linear = nn.Linear(input_dim, 1)
        self.activation = nn.Tanh()

    def forward(self, x):
        return self.activation(self.linear(x))

final_scores = {}

for model_name in ['TF-IDF', 'BERT', 'E5']:
    print(f"Training on: {model_name}...")

    data = results_data[model_name]
    model = Classifier(data['dim']).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()

    acc_history = []

    for epoch in range(100):
        model.train()
        optimizer.zero_grad()
        out = model(data['train'])
        loss = criterion(out, y_train)
        loss.backward()
        optimizer.step()

        # Test
        model.eval()
        with torch.no_grad():
            preds = torch.sign(model(data['test']))
            acc = (preds == y_test).float().mean().item()
            acc_history.append(acc)

    final_scores[model_name] = acc_history
    print(f"   -> {model_name} Final Accuracy: {acc_history[-1]*100:.1f}%")

# --- Plotting ---
plt.figure(figsize=(10, 6))

colors = {'TF-IDF': 'gray', 'BERT': 'orange', 'E5': 'green'}
styles = {'TF-IDF': '--', 'BERT': '-.', 'E5': '-'}

for name, scores in final_scores.items():
    plt.plot(scores, label=f"{name} (Final: {scores[-1]*100:.0f}%)",
             color=colors[name], linestyle=styles[name], linewidth=2.5)

plt.axhline(y=0.5, color='red', linestyle=':', label="Random Baseline")
plt.title("Comparison of Text Representation Models")
plt.xlabel("Epoch")
plt.ylabel("Test Accuracy")
plt.ylim(0.4, 1.05)
plt.legend()
plt.grid(True, alpha=0.3)

plt.savefig("Bonus_Embedding_Comparison.png")
plt.show()

print("\nPlot saved: Bonus_Embedding_Comparison.png")