# Quick Start: Your First MLP Experiment

This notebook gets you started immediately with:
- Generating synthetic data
- Training an MLP classifier
- Exploring parameter space
- Visualizing decision boundaries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

# Set random seed for reproducibility
np.random.seed(42)

# Set plot style
plt.style.use('seaborn-v0_8-whitegrid')

## 1. Generate Synthetic Data

We'll use the `make_moons` dataset - a classic non-linearly separable dataset that's perfect for demonstrating neural network capabilities.

In [None]:
# Generate moon-shaped data
X, y = make_moons(n_samples=1000, noise=0.2, random_state=42)

# Visualize the data
plt.figure(figsize=(10, 6))
plt.scatter(X[y == 0, 0], X[y == 0, 1], c='blue', label='Class 0', alpha=0.6)
plt.scatter(X[y == 1, 0], X[y == 1, 1], c='red', label='Class 1', alpha=0.6)
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Make Moons Dataset')
plt.legend()
plt.show()

print(f"Dataset shape: {X.shape}")
print(f"Class distribution: {np.bincount(y)}")

## 2. Preprocess Data

In [None]:
# Scale features (important for neural networks!)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

print(f"Training set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")

## 3. Train a Simple MLP

In [None]:
# Create and train a simple MLP
mlp = MLPClassifier(
    hidden_layer_sizes=(50,),  # One hidden layer with 50 neurons
    activation='relu',
    solver='adam',
    alpha=0.001,
    learning_rate_init=0.001,
    max_iter=500,
    random_state=42
)

mlp.fit(X_train, y_train)

# Evaluate
train_score = mlp.score(X_train, y_train)
test_score = mlp.score(X_test, y_test)

print(f"Training Accuracy: {train_score:.4f}")
print(f"Test Accuracy: {test_score:.4f}")
print(f"\nNumber of iterations: {mlp.n_iter_}")

## 4. Visualize Decision Boundary

In [None]:
def plot_decision_boundary(model, X, y, title="Decision Boundary"):
    """Plot the decision boundary of a classifier."""
    h = 0.02  # Step size in mesh
    
    # Create mesh grid
    x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
    y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    
    # Predict on mesh
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    # Plot
    plt.figure(figsize=(10, 6))
    plt.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.RdYlBu)
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.RdYlBu, edgecolors='black', s=50)
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.title(title)
    plt.colorbar()
    plt.show()

plot_decision_boundary(mlp, X_scaled, y, "MLP Decision Boundary (Single Hidden Layer)")

## 5. Explore Parameter Space with Grid Search

In [None]:
# Define parameter grid
param_grid = {
    'hidden_layer_sizes': [(10,), (50,), (100,), (20, 10), (50, 25)],
    'alpha': [0.0001, 0.001, 0.01, 0.1],
    'learning_rate_init': [0.001, 0.01]
}

# Create MLP for grid search
mlp_gs = MLPClassifier(
    activation='relu',
    solver='adam',
    max_iter=500,
    random_state=42,
    early_stopping=True,
    validation_fraction=0.1
)

# Perform grid search
grid_search = GridSearchCV(
    mlp_gs, 
    param_grid, 
    cv=5, 
    scoring='accuracy',
    n_jobs=-1,
    verbose=1,
    return_train_score=True
)

grid_search.fit(X_train, y_train)

In [None]:
# Display results
print("Best Parameters:")
for param, value in grid_search.best_params_.items():
    print(f"  {param}: {value}")

print(f"\nBest CV Score: {grid_search.best_score_:.4f}")
print(f"Test Score: {grid_search.score(X_test, y_test):.4f}")

## 6. Visualize Grid Search Results

In [None]:
# Convert results to DataFrame
results_df = pd.DataFrame(grid_search.cv_results_)

# Extract relevant columns
results_df = results_df[[
    'param_hidden_layer_sizes', 
    'param_alpha', 
    'param_learning_rate_init',
    'mean_test_score', 
    'std_test_score',
    'mean_train_score',
    'rank_test_score'
]].sort_values('rank_test_score')

print("Top 10 Parameter Combinations:")
results_df.head(10)

In [None]:
# Plot: Effect of alpha on accuracy for different architectures
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Group by hidden layer sizes
for hl_size in param_grid['hidden_layer_sizes']:
    mask = results_df['param_hidden_layer_sizes'] == hl_size
    subset = results_df[mask].groupby('param_alpha')['mean_test_score'].mean()
    axes[0].plot(subset.index, subset.values, marker='o', label=str(hl_size))

axes[0].set_xscale('log')
axes[0].set_xlabel('Alpha (L2 Regularization)')
axes[0].set_ylabel('Mean CV Accuracy')
axes[0].set_title('Effect of Alpha on Accuracy')
axes[0].legend(title='Hidden Layers')

# Group by alpha
for alpha in param_grid['alpha']:
    mask = results_df['param_alpha'] == alpha
    subset = results_df[mask].groupby('param_hidden_layer_sizes').agg({'mean_test_score': 'mean'}).reset_index()
    x_labels = [str(hl) for hl in subset['param_hidden_layer_sizes']]
    axes[1].plot(x_labels, subset['mean_test_score'].values, marker='s', label=f'Î±={alpha}')

axes[1].set_xlabel('Hidden Layer Sizes')
axes[1].set_ylabel('Mean CV Accuracy')
axes[1].set_title('Effect of Architecture on Accuracy')
axes[1].legend()
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## 7. Best Model Decision Boundary

In [None]:
# Plot decision boundary of best model
best_model = grid_search.best_estimator_
plot_decision_boundary(
    best_model, 
    X_scaled, 
    y, 
    f"Best MLP Decision Boundary\n{grid_search.best_params_}"
)

## 8. Compare Architectures Visually

In [None]:
# Train models with different architectures and visualize
architectures = [
    (10,),
    (50,),
    (100,),
    (50, 25),
    (100, 50, 25)
]

fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.ravel()

for idx, arch in enumerate(architectures):
    # Train model
    model = MLPClassifier(
        hidden_layer_sizes=arch,
        activation='relu',
        solver='adam',
        alpha=0.001,
        max_iter=500,
        random_state=42
    )
    model.fit(X_train, y_train)
    
    # Plot decision boundary
    h = 0.02
    x_min, x_max = X_scaled[:, 0].min() - 0.5, X_scaled[:, 0].max() + 0.5
    y_min, y_max = X_scaled[:, 1].min() - 0.5, X_scaled[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    axes[idx].contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.RdYlBu)
    axes[idx].scatter(X_scaled[:, 0], X_scaled[:, 1], c=y, cmap=plt.cm.RdYlBu, 
                      edgecolors='black', s=20)
    axes[idx].set_title(f'Architecture: {arch}\nAccuracy: {model.score(X_test, y_test):.3f}')

# Hide empty subplot
axes[-1].axis('off')

plt.tight_layout()
plt.show()

## 9. Loss Curve Analysis

In [None]:
# Train model and plot loss curve
mlp_loss = MLPClassifier(
    hidden_layer_sizes=(50, 25),
    activation='relu',
    solver='adam',
    alpha=0.001,
    learning_rate_init=0.001,
    max_iter=500,
    random_state=42
)

mlp_loss.fit(X_train, y_train)

plt.figure(figsize=(10, 6))
plt.plot(mlp_loss.loss_curve_, linewidth=2)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('MLP Training Loss Curve')
plt.grid(True)
plt.show()

print(f"Final Loss: {mlp_loss.loss_curve_[-1]:.6f}")
print(f"Test Accuracy: {mlp_loss.score(X_test, y_test):.4f}")

## Summary

In this quick start, you learned:

1. **Data Generation**: Using `make_moons` to create non-linear classification data
2. **Preprocessing**: Scaling features with `StandardScaler`
3. **MLP Training**: Using `MLPClassifier` with various parameters
4. **Parameter Exploration**: Using `GridSearchCV` for hyperparameter tuning
5. **Visualization**: Decision boundaries and loss curves

### Next Steps

- Proceed to **Notebook 01** for deeper data simulation
- Try different datasets: `make_circles`, `make_classification`
- Experiment with more parameters: `activation`, `solver`