**Question 1: SIX DEGREES OF KEVIN BACON**

In [None]:
import csv
from collections import deque
from google.colab import files

# Upload files
uploaded = files.upload()  # Upload people.csv, movies.csv, and stars.csv

# File names (as uploaded)
PEOPLE_CSV = "people.csv"
MOVIES_CSV = "movies.csv"
STARS_CSV = "stars.csv"

# Data structures
names = {}     # name.lower() -> set of person_ids
people = {}    # person_id -> {name, birth, movies}
movies = {}    # movie_id -> {title, year, stars}


def load_data():
    # Load people
    with open(PEOPLE_CSV, encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            person_id = row["id"]
            name = row["name"]
            birth = row["birth"]
            people[person_id] = {"name": name, "birth": birth, "movies": set()}
            names.setdefault(name.lower(), set()).add(person_id)

    # Load movies
    with open(MOVIES_CSV, encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            movie_id = row["id"]
            title = row["title"]
            year = row["year"]
            movies[movie_id] = {"title": title, "year": year, "stars": set()}

    # Load stars (connections)
    with open(STARS_CSV, encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            try:
                person_id = row["person_id"]
                movie_id = row["movie_id"]
                people[person_id]["movies"].add(movie_id)
                movies[movie_id]["stars"].add(person_id)
            except KeyError:
                continue


def person_id_for_name(name):
    person_ids = list(names.get(name.lower(), set()))
    if len(person_ids) == 0:
        return None
    elif len(person_ids) == 1:
        return person_ids[0]
    else:
        print(f"Multiple people found for '{name}':")
        for pid in person_ids:
            person = people[pid]
            print(f"{pid}: {person['name']} ({person['birth']})")
        return input("Enter the ID of the correct person: ").strip()


def neighbors_for_person(person_id):
    neighbors = set()
    for movie_id in people[person_id]["movies"]:
        for co_star in movies[movie_id]["stars"]:
            if co_star != person_id:
                neighbors.add((movie_id, co_star))
    return neighbors


def shortest_path(source, target):
    frontier = deque()
    frontier.append((None, source))
    explored = set()
    parents = {source: (None, None)}  # person_id -> (movie_id, parent_id)

    while frontier:
        movie_id, person_id = frontier.popleft()

        if person_id == target:
            # Reconstruct path
            path = []
            while parents[person_id][1] is not None:
                movie_id, parent_id = parents[person_id]
                path.append((movie_id, person_id))
                person_id = parent_id
            path.reverse()
            return path

        explored.add(person_id)

        for movie, neighbor in neighbors_for_person(person_id):
            if neighbor not in explored and neighbor not in parents:
                parents[neighbor] = (movie, person_id)
                frontier.append((movie, neighbor))

    return None


def main():
    load_data()
    print("Welcome to the 'Six Degrees of Separation' Finder!")
    source_name = input("Enter the name of the source actor: ")
    target_name = input("Enter the name of the target actor: ")

    source = person_id_for_name(source_name)
    if source is None:
        print(f"Actor '{source_name}' not found.")
        return

    target = person_id_for_name(target_name)
    if target is None:
        print(f"Actor '{target_name}' not found.")
        return

    path = shortest_path(source, target)

    if path is None:
        print("No connection found.")
    else:
        print(f"\nShortest connection between {people[source]['name']} and {people[target]['name']}:")
        for i, (movie_id, person_id) in enumerate(path):
            movie = movies[movie_id]["title"]
            actor1 = people[path[i-1][1] if i > 0 else source]["name"]
            actor2 = people[person_id]["name"]
            print(f"{i + 1}: {actor1} and {actor2} appeared in '{movie}'")


main()


Saving movies.csv to movies.csv
Saving people.csv to people.csv
Saving stars.csv to stars.csv
Welcome to the 'Six Degrees of Separation' Finder!
Enter the name of the source actor: Bill Paxton
Enter the name of the target actor: Cary Elwes

Shortest connection between Bill Paxton and Cary Elwes:
1: Bill Paxton and Gary Sinise appeared in 'Apollo 13'
2: Gary Sinise and Robin Wright appeared in 'Forrest Gump'
3: Robin Wright and Cary Elwes appeared in 'The Princess Bride'


**REASONING FOR CHOOSING THIS SEARCH APPROACH**
We used Breadth-First Search (BFS) for the following reasons:

Shortest Path Guarantee: BFS explores level-by-level, ensuring the first time we reach the target actor is via the shortest number of connections.

Unweighted Graph: All actor-to-actor connections (via movies) are treated equally, making BFS the optimal choice.

Efficiency: For small-to-medium datasets like this, BFS is fast and uses manageable memory.

Reconstructable Path: BFS enables easy backtracking to reconstruct the path of connections.

**Question 2: NEURAL NETWORK**

In [1]:
# Step 1: Install required packages
!pip install opencv-python-headless tensorflow scikit-learn

# Step 2: Import libraries
import os
import cv2
import shutil
import zipfile
import numpy as np
from google.colab import files
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Constants
IMG_WIDTH = 30
IMG_HEIGHT = 30
NUM_CATEGORIES = 43
TEST_SIZE = 0.4

# Step 3: Upload dataset ZIP file
print("📁 Please upload Q2_dataset.zip (containing folders 0, 1, ..., 42):")
uploaded = files.upload()

# Step 4: Extract the ZIP file
zip_filename = next(iter(uploaded))
extract_dir = "data"
with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

# Step 5: Detect and remove wrapper folder if exists
top_dirs = os.listdir(extract_dir)
if len(top_dirs) == 1 and os.path.isdir(os.path.join(extract_dir, top_dirs[0])):
    wrapper_path = os.path.join(extract_dir, top_dirs[0])
    for subfolder in os.listdir(wrapper_path):
        shutil.move(os.path.join(wrapper_path, subfolder), os.path.join(extract_dir, subfolder))
    shutil.rmtree(wrapper_path)
print("✅ Extracted folders:", os.listdir(extract_dir))

# Step 6: Load and preprocess images
def load_data(data_dir):
    images = []
    labels = []
    for category in range(NUM_CATEGORIES):
        folder_path = os.path.join(data_dir, str(category))
        if not os.path.isdir(folder_path):
            print(f"⚠️ Skipping missing folder: {folder_path}")
            continue
        for filename in os.listdir(folder_path):
            file_path = os.path.join(folder_path, filename)
            try:
                image = cv2.imread(file_path)
                if image is None:
                    continue
                image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
                images.append(image)
                labels.append(category)
            except Exception as e:
                print(f"❌ Error reading {file_path}: {e}")
    return images, labels

images, labels = load_data(extract_dir)
print(f"📸 Loaded {len(images)} images.")

# Step 7: Preprocess for training
images = np.array(images) / 255.0  # Normalize pixel values to [0,1]
labels = to_categorical(labels, NUM_CATEGORIES)

# Step 8: Train-test split
x_train, x_test, y_train, y_test = train_test_split(
    images, labels, test_size=TEST_SIZE, random_state=42
)

# Step 9: Define CNN model
def get_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(NUM_CATEGORIES, activation='softmax'))

    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

# Step 10: Train model
print("🚀 Training model...")
model = get_model()
model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))

# Step 11: Evaluate model
print("📊 Evaluating model...")
loss, acc = model.evaluate(x_test, y_test, verbose=2)
print(f"✅ Test Accuracy: {acc:.4f}")

# Step 12: Function to predict single image
def predict_single_image(model, image_path):
    """
    Loads a single image from image_path, preprocesses it,
    and predicts the road sign category using the trained model.

    Args:
        model: Trained keras model for classification.
        image_path: String path to the image file.

    Returns:
        predicted_category: Integer label predicted by the model.
        confidence: Probability score of the predicted class.
    """
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Could not read image from path: {image_path}")

    image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
    image = image / 255.0  # normalize
    image = np.expand_dims(image, axis=0)  # add batch dimension

    predictions = model.predict(image)
    predicted_category = np.argmax(predictions)
    confidence = predictions[0][predicted_category]
    return predicted_category, confidence

# Step 13: Upload and predict a single image
print("📸 Upload a single road sign image to classify:")
uploaded_img = files.upload()
img_path = next(iter(uploaded_img.keys()))

category, confidence = predict_single_image(model, img_path)
print(f"Predicted category: {category} with confidence: {confidence:.4f}")

📁 Please upload Q2_dataset.zip (containing folders 0, 1, ..., 42):


Saving Q2_dataset.zip to Q2_dataset (3).zip
✅ Extracted folders: ['23', '14', '16', 'Q2_dataset', '20', '28', '18', '29', '22', '19', '30', '4', '42', '1', '12', '34', '8', '35', '6', '40', '25', '10', '2', '27', '3', '17', '9', '26', '31', '24', '37', '33', '11', '39', '5', '36', '15', '32', '7', '41', '21', '13', '38', '0']
📸 Loaded 26640 images.
🚀 Training model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 50ms/step - accuracy: 0.0927 - loss: 3.4164 - val_accuracy: 0.4182 - val_loss: 1.7246
Epoch 2/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 49ms/step - accuracy: 0.4882 - loss: 1.6107 - val_accuracy: 0.8307 - val_loss: 0.5967
Epoch 3/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 47ms/step - accuracy: 0.7660 - loss: 0.7205 - val_accuracy: 0.9321 - val_loss: 0.2502
Epoch 4/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 49ms/step - accuracy: 0.8663 - loss: 0.4038 - val_accuracy: 0.9717 - val_loss: 0.1241
Epoch 5/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 46ms/step - accuracy: 0.9148 - loss: 0.2785 - val_accuracy: 0.9748 - val_loss: 0.0906
Epoch 6/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 47ms/step - accuracy: 0.9423 - loss: 0.1806 - val_accuracy: 0.9833 - val_loss: 0.0591
Epoch 7/10
[1m5

Saving images.png to images.png
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
Predicted category: 3 with confidence: 0.9998


**Bonus Question**

Favourite quote:
"You miss 100% of the shots you don't take."