# Module 4: Introduction to Deep Learning## Learning ObjectivesBy the end of this module, you should be able to:- Understand the fundamentals of neural networks- Build and train deep learning models- Implement CNNs for image classification- Apply transfer learning techniques- Evaluate model performance## Topics Covered- Neural network fundamentals- TensorFlow/Keras or PyTorch basics- Building simple neural networks for image classification- Convolutional Neural Networks (CNN) theory- Transfer learning concepts

## 1. Neural Network Fundamentals### PerceptronThe perceptron is the simplest form of a neural network.

In [None]:
import numpy as npimport matplotlib.pyplot as plt# Simple perceptron implementationclass Perceptron:    def __init__(self, learning_rate=0.1, n_iterations=100):        self.learning_rate = learning_rate        self.n_iterations = n_iterations        def activation(self, x):        return np.where(x >= 0, 1, 0)        def fit(self, X, y):        # Initialize weights and bias        self.weights = np.zeros(X.shape[1])        self.bias = 0                # Training loop        for _ in range(self.n_iterations):            for i in range(X.shape[0]):                # Forward pass                linear_output = np.dot(X[i], self.weights) + self.bias                y_predicted = self.activation(linear_output)                                # Update weights and bias                update = self.learning_rate * (y[i] - y_predicted)                self.weights += update * X[i]                self.bias += update        def predict(self, X):        linear_output = np.dot(X, self.weights) + self.bias        return self.activation(linear_output)# Create a simple 2D datasetnp.random.seed(42)X = np.random.randn(100, 2)y = np.where(X[:, 0] + X[:, 1] > 0, 1, 0)# Train the perceptronperceptron = Perceptron(learning_rate=0.1, n_iterations=100)perceptron.fit(X, y)# Make predictionspredictions = perceptron.predict(X)accuracy = np.mean(predictions == y)print(f"Perceptron Accuracy: {accuracy:.2f}")print(f"Learned weights: {perceptron.weights}")print(f"Learned bias: {perceptron.bias}")

In [None]:
# Visualize the decision boundaryplt.figure(figsize=(10, 8))# Plot data pointsscatter = plt.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis', alpha=0.7)plt.colorbar(scatter)# Plot decision boundaryx_range = np.linspace(X[:, 0].min(), X[:, 0].max(), 100)y_range = -(perceptron.weights[0] * x_range + perceptron.bias) / perceptron.weights[1]plt.plot(x_range, y_range, 'r-', linewidth=2, label='Decision Boundary')plt.xlabel('Feature 1')plt.ylabel('Feature 2')plt.title('Perceptron Decision Boundary')plt.legend()plt.grid(True, alpha=0.3)plt.show()

## 2. Multi-Layer Perceptron (MLP)MLPs extend perceptrons with hidden layers.

In [None]:
import torchimport torch.nn as nnimport torch.optim as optimfrom sklearn.datasets import make_classificationfrom sklearn.model_selection import train_test_splitfrom sklearn.preprocessing import StandardScaler# Create a more complex classification datasetX, y = make_classification(n_samples=1000, n_features=20, n_informative=10,                            n_redundant=10, n_clusters_per_class=1, random_state=42)# Split and scale the dataX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)scaler = StandardScaler()X_train_scaled = scaler.fit_transform(X_train)X_test_scaled = scaler.transform(X_test)# Convert to PyTorch tensorsX_train_tensor = torch.FloatTensor(X_train_scaled)y_train_tensor = torch.LongTensor(y_train)X_test_tensor = torch.FloatTensor(X_test_scaled)y_test_tensor = torch.LongTensor(y_test)print(f"Training set shape: {X_train_tensor.shape}")print(f"Test set shape: {X_test_tensor.shape}")

In [None]:
# Define MLP modelclass MLP(nn.Module):    def __init__(self, input_size, hidden_size, num_classes):        super(MLP, self).__init__()        self.fc1 = nn.Linear(input_size, hidden_size)        self.relu = nn.ReLU()        self.fc2 = nn.Linear(hidden_size, hidden_size)        self.fc3 = nn.Linear(hidden_size, num_classes)        def forward(self, x):        out = self.fc1(x)        out = self.relu(out)        out = self.fc2(out)        out = self.relu(out)        out = self.fc3(out)        return out# Initialize model, loss, and optimizermodel = MLP(input_size=20, hidden_size=64, num_classes=2)criterion = nn.CrossEntropyLoss()optimizer = optim.Adam(model.parameters(), lr=0.001)print("MLP Model Architecture:")print(model)

In [None]:
# Training loopnum_epochs = 100train_losses = []for epoch in range(num_epochs):    # Forward pass    outputs = model(X_train_tensor)    loss = criterion(outputs, y_train_tensor)        # Backward pass and optimization    optimizer.zero_grad()    loss.backward()    optimizer.step()        train_losses.append(loss.item())        if (epoch + 1) % 20 == 0:        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')# Plot training lossplt.figure(figsize=(10, 6))plt.plot(train_losses)plt.title('Training Loss')plt.xlabel('Epoch')plt.ylabel('Loss')plt.grid(True, alpha=0.3)plt.show()

In [None]:
# Evaluate the modelmodel.eval()with torch.no_grad():    # Training accuracy    train_outputs = model(X_train_tensor)    _, train_predicted = torch.max(train_outputs.data, 1)    train_accuracy = (train_predicted == y_train_tensor).sum().item() / len(y_train_tensor)        # Test accuracy    test_outputs = model(X_test_tensor)    _, test_predicted = torch.max(test_outputs.data, 1)    test_accuracy = (test_predicted == y_test_tensor).sum().item() / len(y_test_tensor)print(f"Training Accuracy: {train_accuracy:.4f}")print(f"Test Accuracy: {test_accuracy:.4f}")

## 3. Convolutional Neural Networks (CNNs)CNNs are specialized neural networks for processing data with grid-like topology, such as images.

In [None]:
# Simple CNN implementationclass SimpleCNN(nn.Module):    def __init__(self, num_classes=10):        super(SimpleCNN, self).__init__()        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)        self.pool = nn.MaxPool2d(2, 2)        self.fc1 = nn.Linear(64 * 7 * 7, 128)        self.fc2 = nn.Linear(128, num_classes)        self.relu = nn.ReLU()        self.dropout = nn.Dropout(0.5)        def forward(self, x):        # Convolutional layers        x = self.pool(self.relu(self.conv1(x)))        x = self.pool(self.relu(self.conv2(x)))                # Flatten and fully connected layers        x = x.view(-1, 64 * 7 * 7)        x = self.relu(self.fc1(x))        x = self.dropout(x)        x = self.fc2(x)                return x# Conceptual visualization of CNN architectureprint("Simple CNN Architecture:")print("Input -> Conv1(32 filters) -> ReLU -> MaxPool")print("      -> Conv2(64 filters) -> ReLU -> MaxPool")print("      -> Flatten -> FC1(128) -> ReLU -> Dropout")print("      -> FC2(10) -> Output")

In [None]:
# Visualize what convolution doesplt.figure(figsize=(15, 5))# Create a sample image (a simple square)sample_img = np.zeros((28, 28))sample_img[10:18, 10:18] = 1# Simple edge detection filter (vertical edges)vertical_filter = np.array([[-1, 0, 1],                           [-1, 0, 1],                           [-1, 0, 1]])# Apply convolution manuallyfiltered_img = np.zeros((26, 26))for i in range(26):    for j in range(26):        filtered_img[i, j] = np.sum(sample_img[i:i+3, j:j+3] * vertical_filter)plt.subplot(1, 3, 1)plt.imshow(sample_img, cmap='gray')plt.title('Original Image')plt.axis('off')plt.subplot(1, 3, 2)plt.imshow(vertical_filter, cmap='RdBu')plt.title('Vertical Edge Filter')plt.axis('off')plt.subplot(1, 3, 3)plt.imshow(filtered_img, cmap='gray')plt.title('Filtered Image')plt.axis('off')plt.suptitle('Convolution Operation')plt.tight_layout()plt.show()

## 4. Transfer LearningTransfer learning involves taking a pre-trained model and adapting it for a new task.

In [None]:
# Conceptual example of transfer learningfig, ax = plt.subplots(1, 1, figsize=(12, 8))ax.text(0.5, 0.9, 'Transfer Learning Concept', ha='center', va='center', fontsize=16, transform=ax.transAxes)ax.text(0.3, 0.7, 'Pre-trained Model\n(ImageNet dataset)\n\n- Learned generic features\n- Trained on 1000 classes\n- Millions of parameters',         ha='center', va='center', fontsize=12, transform=ax.transAxes, bbox=dict(boxstyle="round,pad=0.3", facecolor="lightblue"))ax.text(0.7, 0.7, 'New Task\n(Custom dataset)\n\n- Fewer classes\n- Limited data\n- Similar domain',         ha='center', va='center', fontsize=12, transform=ax.transAxes, bbox=dict(boxstyle="round,pad=0.3", facecolor="lightgreen"))ax.annotate('', xy=(0.7, 0.7), xytext=(0.3, 0.7),             arrowprops=dict(arrowstyle="->", lw=2, color='red'))ax.text(0.5, 0.75, 'Transfer Knowledge', ha='center', va='center', fontsize=12, color='red', transform=ax.transAxes)ax.text(0.5, 0.5, 'Approaches:', ha='center', va='center', fontsize=14, transform=ax.transAxes)ax.text(0.25, 0.4, 'Fine-tuning\n- Train all layers\n- Lower learning rate',         ha='center', va='center', fontsize=11, transform=ax.transAxes, bbox=dict(boxstyle="round,pad=0.3", facecolor="lightyellow"))ax.text(0.5, 0.4, 'Feature Extraction\n- Freeze early layers\n- Train only classifier',         ha='center', va='center', fontsize=11, transform=ax.transAxes, bbox=dict(boxstyle="round,pad=0.3", facecolor="lightyellow"))ax.text(0.75, 0.4, 'Hybrid\n- Freeze some layers\n- Fine-tune others',         ha='center', va='center', fontsize=11, transform=ax.transAxes, bbox=dict(boxstyle="round,pad=0.3", facecolor="lightyellow"))ax.set_title('Transfer Learning Workflow')ax.axis('off')plt.show()print("Transfer Learning Benefits:")print("1. Faster training")print("2. Better performance with limited data")print("3. Leverages knowledge from large datasets")print("4. Reduces computational requirements")

## 5. Model EvaluationProper evaluation is crucial for understanding model performance.

In [None]:
from sklearn.metrics import confusion_matrix, classification_reportimport seaborn as sns# Generate sample predictions for evaluationnp.random.seed(42)y_true = np.random.randint(0, 3, 100)y_pred = np.random.randint(0, 3, 100)# Ensure some correlation for more realistic resultsmask = np.random.random(100) > 0.3y_pred[mask] = y_true[mask]# Confusion matrixcm = confusion_matrix(y_true, y_pred)plt.figure(figsize=(8, 6))sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',             xticklabels=['Class 0', 'Class 1', 'Class 2'],            yticklabels=['Class 0', 'Class 1', 'Class 2'])plt.title('Confusion Matrix')plt.xlabel('Predicted')plt.ylabel('Actual')plt.show()print("Classification Report:")print(classification_report(y_true, y_pred))

## SummaryIn this module, we've covered:1. Neural network fundamentals with perceptrons2. Multi-layer perceptrons for complex classification3. Convolutional Neural Networks for image processing4. Transfer learning concepts and approaches5. Model evaluation techniquesThese foundational deep learning concepts prepare us for more advanced computer vision architectures in the next module, where we'll explore state-of-the-art models and specialized techniques.