In [6]:
# 1 Standardize your dataset so that each feature has mean = 0 and std = 1 (do it manually).
import math

def standardize_dataset(data):
    
    n_samples = len(data)
    n_features = len(data[0])
    
    # Calculate mean and std for each feature
    means = []
    stds = []
    
    for j in range(n_features):
        # mean
        feature_values = [data[i][j] for i in range(n_samples)]
        mean = sum(feature_values) / n_samples
        means.append(mean)
        
        # standard deviation
        variance = sum((x - mean) ** 2 for x in feature_values) / n_samples
        std = math.sqrt(variance)
        stds.append(std)
    
    # Standardize
    standardized_data = []
    for i in range(n_samples):
        row = []
        for j in range(n_features):
            if stds[j] != 0:
                z = (data[i][j] - means[j]) / stds[j]
            else:
                z = 0  # avoid division by zero
            row.append(z)
        standardized_data.append(row)
    
    return standardized_data, means, stds


data = [
    [1, 200],
    [2, 300],
    [3, 400]
]

standardized_data, means, stds = standardize_dataset(data)

print("Means:", means)
print("Stds:", stds)
print("Standardized Data:")
for row in standardized_data:
    print(row)


Means: [2.0, 300.0]
Stds: [0.816496580927726, 81.64965809277261]
Standardized Data:
[-1.224744871391589, -1.224744871391589]
[0.0, 0.0]
[1.224744871391589, 1.224744871391589]


In [9]:
# 2 Center the data by subtracting feature means and print the centered matrix.

def center_data(data):
    n_samples = len(data)
    n_features = len(data[0])
    
    # Calculate mean of each feature
    feature_means = []
    for j in range(n_features):
        column_values = [data[i][j] for i in range(n_samples)]
        mean = sum(column_values) / n_samples
        feature_means.append(mean)
    
    # Center the data
    centered_data = []
    for i in range(n_samples):
        centered_row = []
        for j in range(n_features):
            centered_row.append(data[i][j] - feature_means[j])
        centered_data.append(centered_row)
    
    return centered_data, feature_means


data = [
    [1, 4],
    [2, 5],
    [3, 6]
]

centered_matrix, means = center_data(data)

print("Feature Means:", means)
print("Centered Data:")
for row in centered_matrix:
    print(row)


Feature Means: [2.0, 5.0]
Centered Data:
[-1.0, -1.0]
[0.0, 0.0]
[1.0, 1.0]


In [10]:
# 3 Compute the covariance matrix from scratch (no library function).

def covariance_matrix(data):
    n_samples = len(data)
    n_features = len(data[0])
    
    # Step 1: Compute means
    means = []
    for j in range(n_features):
        column = [data[i][j] for i in range(n_samples)]
        means.append(sum(column) / n_samples)
    
    # Step 2: Center the data
    centered = []
    for i in range(n_samples):
        row = []
        for j in range(n_features):
            row.append(data[i][j] - means[j])
        centered.append(row)
    
    # Step 3: Compute covariance matrix
    cov_matrix = []
    for i in range(n_features):
        row = []
        for j in range(n_features):
            cov = 0
            for k in range(n_samples):
                cov += centered[k][i] * centered[k][j]
            cov = cov / (n_samples - 1)
            row.append(cov)
        cov_matrix.append(row)
    
    return cov_matrix

data = [
    [1, 4],
    [2, 5],
    [3, 6]
]

cov = covariance_matrix(data)

print("Covariance Matrix:")
for row in cov:
    print(row)


Covariance Matrix:
[1.0, 1.0]
[1.0, 1.0]


In [12]:
# 4 Verify that your covariance matrix equals numpy.cov() output.

import numpy as np

# Original data
data = [
    [1, 4],
    [2, 5],
    [3, 6]
]

# Manual covariance
manual_cov = covariance_matrix(data)

# NumPy covariance
np_data = np.array(data)
numpy_cov = np.cov(np_data, rowvar=False)

# Print results
print("Manual Covariance Matrix:")
for row in manual_cov:
    print(row)

print("\nNumPy Covariance Matrix:")
print(numpy_cov)

# Verification
print("\nAre both matrices equal?")
print(np.allclose(manual_cov, numpy_cov))


Manual Covariance Matrix:
[1.0, 1.0]
[1.0, 1.0]

NumPy Covariance Matrix:
[[1. 1.]
 [1. 1.]]

Are both matrices equal?
True


In [14]:
# 5 Print the pair of features having the maximum covariance value.

def max_covariance_pair(cov_matrix):
    n_features = len(cov_matrix)
    
    max_cov = float('-inf')
    feature_pair = None
    
    for i in range(n_features):
        for j in range(n_features):
            if i != j:  # skip diagonal
                if cov_matrix[i][j] > max_cov:
                    max_cov = cov_matrix[i][j]
                    feature_pair = (i, j)
    
    return feature_pair, max_cov

# Sample data
data = [
    [1, 4],
    [2, 5],
    [3, 6]
]

# Step 1: covariance matrix
cov_matrix = covariance_matrix(data)

# Step 2: find max covariance pair
pair, value = max_covariance_pair(cov_matrix)

print("Covariance Matrix:")
for row in cov_matrix:
    print(row)

print("\nFeature pair with maximum covariance:")
print(f"Feature {pair[0]} and Feature {pair[1]}")
print("Covariance value:", value)


Covariance Matrix:
[1.0, 1.0]
[1.0, 1.0]

Feature pair with maximum covariance:
Feature 0 and Feature 1
Covariance value: 1.0


In [16]:
# 6 Save your covariance matrix to a CSV file using Python code.

import csv

def save_covariance_to_csv(cov_matrix, filename):
    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        for row in cov_matrix:
            writer.writerow(row)


# Save to CSV
save_covariance_to_csv(cov_matrix, "covariance_matrix.csv")

print("Covariance matrix saved to covariance_matrix.csv")


Covariance matrix saved to covariance_matrix.csv
