In [34]:
import pandas as pd
import numpy as np
pd.set_option('display.width', 1000)

In [35]:
forest_fires = pd.read_csv('forestfires.csv')

print(forest_fires.head())

   X  Y month  day  FFMC   DMC     DC  ISI  temp  RH  wind  rain  area
0  7  5   mar  fri  86.2  26.2   94.3  5.1   8.2  51   6.7   0.0   0.0
1  7  4   oct  tue  90.6  35.4  669.1  6.7  18.0  33   0.9   0.0   0.0
2  7  4   oct  sat  90.6  43.7  686.9  6.7  14.6  33   1.3   0.0   0.0
3  8  6   mar  fri  91.7  33.3   77.5  9.0   8.3  97   4.0   0.2   0.0
4  8  6   mar  sun  89.3  51.3  102.2  9.6  11.4  99   1.8   0.0   0.0


In [170]:
def compute_multi_dimensional_mean(data: np.ndarray) -> np.ndarray:
    if data.size == 0:
        raise ValueError("Input data is empty")

    num_rows = data.shape[0]

    sum_values = np.sum(data, axis=0)

    mean = sum_values / num_rows

    return mean


In [171]:
data = np.array([[1, 2, 3],
                 [4, 5, 6],
                 [7, 8, 9]])

expected_mean = np.array([4., 5., 6.])
calculated_output = compute_multi_dimensional_mean(data)

print(calculated_output)
assert np.array_equal(expected_mean, calculated_output), "Mean calculation incorrect"
print("Multi dimensional mean correct")

[4. 5. 6.]
Multi dimensional mean correct


In [141]:
def compute_sample_covariance(attribute1, attribute2):
    if len(attribute1) != len(attribute2):
        raise ValueError("Input vectors must have the same length")
    
    n = len(attribute1)
    mean1 = np.mean(attribute1)
    mean2 = np.mean(attribute2)

    covariance = np.sum((attribute1 - mean1) * (attribute2 - mean2)) / (n - 1)
    
    return covariance

In [168]:
attribute1 = np.array([1, 2, 3, 4, 5])
attribute2 = np.array([5, 4, 3, 2, 1])

expected_covariance = -2.5
calculated_output = compute_sample_covariance(attribute1, attribute2)

print(calculated_output)
assert np.isclose(expected_covariance, calculated_output), "Covariance calculation incorrect"
print("Covariance calculation correct")

-2.5
Covariance calculation correct


In [143]:
def compute_correlation(attribute1, attribute2):
    if len(attribute1) != len(attribute2):
        raise ValueError("Input vectors must have the same length")

    mean1 = np.mean(attribute1)
    mean2 = np.mean(attribute2)

    covariance = np.sum((attribute1 - mean1) * (attribute2 - mean2))
    std_dev1 = np.sqrt(np.sum((attribute1 - mean1) ** 2))
    std_dev2 = np.sqrt(np.sum((attribute2 - mean2) ** 2))

    correlation = covariance / (std_dev1 * std_dev2)

    return correlation

In [167]:
attribute1 = np.array([1, 2, 3, 4, 5])
attribute2 = np.array([5, 4, 3, 2, 1])

expected_correlation = -1.0
calculated_output = compute_correlation(attribute1, attribute2)

print(calculated_output)
assert np.isclose(expected_correlation, calculated_output), "Correlation calculation incorrect"
print("Correlation calculation correct")

-0.9999999999999998
Correlation calculation correct


In [145]:
def range_normalize_matrix(matrix):
    normalized_matrix = np.zeros_like(matrix, dtype=float)
    
    for i in range(matrix.shape[1]):
        column = matrix[:, i]
        min_val = np.min(column)
        max_val = np.max(column)
        
        if min_val == max_val:
            normalized_matrix[:, i] = 0
        else:
            normalized_matrix[:, i] = (column - min_val) / (max_val - min_val)
    
    return normalized_matrix
                

In [166]:
array = np.array([[1,2,3],[4,5,6],[7,8,9]])

expected_output = np.array([[0. , 0., 0. ],
[0.5, 0.5, 0.5],
[1. , 1., 1. ]])

calculated_output = range_normalize_matrix(array)

print(calculated_output)
assert np.allclose(expected_output, calculated_output), "Range normalization calculation incorrect"
print("Range normalization calculation correct")

[[0.  0.  0. ]
 [0.5 0.5 0.5]
 [1.  1.  1. ]]
Range normalization calculation correct


In [147]:
def standard_normalize_matrix(matrix):
    normalized_matrix = np.zeros_like(matrix, dtype=float)
    
    for i in range(matrix.shape[1]):
        column = matrix[:, i]
        mean_val = np.mean(column)
        std_val = np.std(column)
        
        normalized_matrix[:, i] = (column - mean_val) / std_val
    
    return normalized_matrix
                

In [165]:
array = np.array([[1,2,3],[4,5,6],[7,8,9]])

expected_output = np.array([[ -1.22474487, -1.22474487, -1.22474487],
 [  0.        ,  0.        ,  0.        ],
 [  1.22474487,  1.22474487,  1.22474487]]
)

calculated_output = standard_normalize_matrix(array)


print(calculated_output)
assert np.allclose(expected_output, calculated_output), "Standard normalization calculation incorrect"
print("Standard normalization calculation correct")

[[-1.22474487 -1.22474487 -1.22474487]
 [ 0.          0.          0.        ]
 [ 1.22474487  1.22474487  1.22474487]]
Standard normalization calculation correct


In [149]:
def compute_covariance_matrix(matrix):
    num_variables = matrix.shape[1]
    num_observations = matrix.shape[0]
        
    covariance_matrix = np.zeros((num_variables, num_variables))

    for i in range(num_variables):
        for j in range(num_variables):
            covariance_ij = compute_sample_covariance(matrix[:, i], matrix[:, j])
            covariance_matrix[i, j] = covariance_ij

    return covariance_matrix

In [164]:
data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) 


expected_output = [[9., 9., 9.],
 [9., 9., 9.],
 [9., 9., 9.]]


calculated_output = compute_covariance_matrix(data)

print(calculated_output)
assert np.allclose(expected_output, calculated_output), "Covariance matrix calculation incorrect"
print("Covariance matrix calculation correct")

[[9. 9. 9.]
 [9. 9. 9.]
 [9. 9. 9.]]
Covariance matrix calculation correct


In [159]:
def label_encode_matrix(matrix):
    encoded_matrix = np.zeros_like(matrix, dtype=int)
    
    for i in range(matrix.shape[1]):
        label_map = {}
        current_label = 0


        column = matrix[:, i]
        encoded_column = np.zeros_like(column, dtype=int)
        
        for j, value in enumerate(column):
            if value not in label_map:
                label_map[value] = current_label
                current_label += 1
            encoded_column[j] = label_map[value]
        
        encoded_matrix[:, i] = encoded_column
    
    return encoded_matrix

In [163]:
array = np.array([['A', 'B', 'C'],
                  ['D', 'E', 'C'],
                  ['A', 'E', 'I']])

calculated_output = label_encode_matrix(array)

expected_output = [[0, 0, 0],
 [1, 1, 0],
 [0, 1, 1]]

print(calculated_output)
assert np.allclose(expected_output, calculated_output), "Covariance matrix calculation incorrect"
print("Covariance matrix calculation correct")

[[0 0 0]
 [1 1 0]
 [0 1 1]]
Covariance matrix calculation correct
