In [2]:
from IMLearn.learners import UnivariateGaussian, MultivariateGaussian
import numpy as np
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "simple_white"

In [4]:
mean = 10
variance = 1
total_samples = 1000
univariate = UnivariateGaussian()
samples = np.random.normal(mean, variance, size=total_samples)
univariate.fit(samples)
print(f"({univariate.mu_}, {univariate.var_})")


(10.012244335492563, 1.0034439725996647)


In [18]:
samples = np.random.normal(mean, variance, size=total_samples)
# samples = np.random.uniform(0, 20, size=total_samples)
univariate.fit(samples)
print(univariate.log_likelihood(0, univariate.var_, samples))

-52560.613837823534


In [9]:

# Question 2 - Empirically showing sample mean is consistent
estimations = []
sample_sizes = np.arange(10, total_samples + 1, 10)
for n_samples in sample_sizes:
    univariate.fit(samples[:n_samples+1])
    estimations.append(univariate.mu_)

errors = np.abs(np.array(estimations) - mean)
fig = go.Figure([go.Scatter(x=sample_sizes, y=errors, name="Mean")])
fig.update_layout(xaxis_title='# Samples', yaxis_title='Mean', title='Estimated Expectation')
fig.show()

fig = go.Figure([go.Scatter(x=samples, y=univariate.pdf(samples), mode='markers')])
fig.update_layout(xaxis_title='Sample Value', yaxis_title='Density', title='Empirical PDF graph')

fig.show()

In [4]:
# Question 4 - Draw samples and print fitted model
multivariate = MultivariateGaussian()
mean = np.array([0, 0, 4, 0])
cov = np.array([[1, 0.2, 0, 0.5], [0.2, 2, 0, 0], [0, 0, 1, 0], [0.5, 0, 0, 1]])

# n_samples = 1000
# n_means_to_check = 200

n_samples = 100
n_means_to_check = 20

samples = np.random.multivariate_normal(mean, cov, size=n_samples)
multivariate.fit(samples)
print(multivariate.mu_)
print(multivariate.cov_)

# Question 5 - Likelihood evaluation
means_to_check = np.linspace(-10, 10, n_means_to_check)

for i in range(-10, 10, 1):
    print(multivariate.log_likelihood([0, 0, 0, 0], cov, samples))

data = [[multivariate.log_likelihood([f1, 0, f3, 0], cov, samples) for f1 in means_to_check] for f3 in means_to_check]
# multivariate.log_likelihood(, cov, samples)

# print(data)
# fig = go.Figure([go.Heatmap(x=means_to_check, y=means_to_check, z=np.array(data))])
fig.update_layout(xaxis_title='Sample Value', yaxis_title='Density', title='Empirical PDF graph')

fig.show()
# multivariate.log_likelihood()
# raise NotImplementedError()
pass

# Question 6 - Maximum likelihood
pass

[0.1499616  0.19748915 4.04793921 0.02616966]
[[ 0.9695865   0.15094796  0.04045062  0.62905538]
 [ 0.15094796  1.55707377 -0.13206706  0.14826744]
 [ 0.04045062 -0.13206706  0.90771251  0.0312028 ]
 [ 0.62905538  0.14826744  0.0312028   1.21018974]]
-1394.7306159150903
-1394.7306159150903
-1394.7306159150903
-1394.7306159150903
-1394.7306159150903
-1394.7306159150903
-1394.7306159150903
-1394.7306159150903
-1394.7306159150903
-1394.7306159150903
-1394.7306159150903
-1394.7306159150903
-1394.7306159150903
-1394.7306159150903
-1394.7306159150903
-1394.7306159150903
-1394.7306159150903
-1394.7306159150903
-1394.7306159150903
-1394.7306159150903


In [5]:
mu = mean
cov_inverse = np.linalg.inv(cov)
X = samples

(m, d) = X.shape
p1 = sum([(X[i] - mu) @ cov_inverse @ (X[i] - mu)])
p2 = m * np.log(np.power(2 * np.pi, d)*np.linalg.det(cov))

print(-0.5*(p1 + p2))
print(multivariate.log_likelihood(mean, cov, samples))

-387.259351994148
-575.5549333408222


In [6]:
multivariate = MultivariateGaussian()
mean = np.array([0, 0, 4, 0])
cov = np.array([[1, 0.2, 0, 0.5], [0.2, 2, 0, 0], [0, 0, 1, 0], [0.5, 0, 0, 1]])

n_samples = 1000
n_means_to_check = 200

In [7]:
samples = np.random.multivariate_normal(mean, cov, size=n_samples)
multivariate.fit(samples)

<IMLearn.learners.gaussian_estimators.MultivariateGaussian at 0x15b0b9910>

In [28]:
means_to_check = np.linspace(-10, 10, n_means_to_check)
data = np.array([[multivariate.log_likelihood([f1, 0, f3, 0], cov, samples) for f3 in means_to_check] for f1 in
                 means_to_check])

In [35]:
max = data.max()
((max_f1_i, max_f3_i),) = np.argwhere(data == max)
max_description = f"Maximum likelihood is: {round(max, 3)}, Achieved at f1={round(means_to_check[max_f1_i], 3)}, f3={round(means_to_check[max_f3_i], 3)}."


fig = go.Figure([go.Heatmap(x=means_to_check, y=means_to_check, z=data)])
fig.update_layout(xaxis_title='Feature 3', yaxis_title='Feature 1',
                  title='Log likelihood of multivariate normal distribution with mean=[f1, 0, f3, 0]<br />'
                        f'<sup>{max_description}</sup>', )
fig.show()

In [36]:
print(data.max())

-5898.898717676345


In [37]:
((max_f1, max_f3),) = np.argwhere(data == data.max())
print(max_f1, max_f3)

139 99


In [38]:
print(means_to_check[89], means_to_check[139])

-1.0552763819095468 3.9698492462311563
