<img src="images/ublogo.png"/>

### CSE610 - Bayesian Non-parametric Machine Learning

  - Lecture Notes
  - Instructor - Varun Chandola
  - Term - Fall 2020

In [None]:
import numpy as np
from scipy.stats import multivariate_normal as mvn
from scipy.stats import zscore
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import seaborn as sns
import GPy
import time
from sklearn.datasets import make_classification, openml
from scipy.spatial.distance import pdist,squareform,cdist
sns.set(color_codes=True)
sns.set_style('whitegrid')
sns.set_context('paper',font_scale=2)
from mpl_toolkits import mplot3d
import plotly.graph_objects as go
%config InlineBackend.print_figure_kwargs = {'bbox_inches':None}
%matplotlib inline

In [None]:
def bayesPosterior(X,y,sigmansq,Sigmap):
    Ainv = np.linalg.inv((1/sigmansq)*np.dot(X,X.T) + np.linalg.inv(Sigmap))
    wpost = (1/sigmansq)*np.dot(Ainv,np.dot(X,y))
    return wpost,Ainv
    

In [None]:
def plot_contours(ax,mean,cov,limits=(-10,10),cm='Reds',levels=6):
    w1s = w2s = np.linspace(limits[0],limits[1],100)
    W1,W2 = np.meshgrid(w1s,w2s)

    pdfs = []
    for w1,w2 in zip(W1.flatten(),W2.flatten()):
        w = np.array([w1,w2])
        pdfs.append(mvn.pdf(w,mean,cov))
    pdfs = np.array(pdfs)
    pdfs = np.reshape(pdfs,W1.shape)

    cfset = ax.contourf(W1, W2, pdfs, levels=levels,cmap=cm)
    cset = ax.contour(W1, W2, pdfs, levels=levels, colors='k',alpha=0.6)
    ax.clabel(cset, inline=1, fontsize=10)

In [None]:
'''
Copied from: https://github.com/krasserm/bayesian-machine-learning/
'''
def plot_gp(mu, cov, X, X_train=None, Y_train=None, samples=[],legend=True):
    X = X.ravel()
    mu = mu.ravel()
    uncertainty = 1.96 * np.sqrt(np.diag(cov))
    
    plt.fill_between(X, mu + uncertainty, mu - uncertainty, alpha=0.1)
    plt.plot(X, mu, label='Mean')
    for i, sample in enumerate(samples):
        plt.plot(X, sample, lw=1, ls='--', label=f'Sample {i+1}')
    if X_train is not None:
        plt.plot(X_train, Y_train, 'rx')
    if legend:
        plt.legend(ncol=3)

In [None]:
def load_mauna_loa_atmospheric_co2():
    ml_data = openml.fetch_openml(data_id=41187)
    months = []
    ppmv_sums = []
    counts = []

    y = ml_data.data[:, 0]
    m = ml_data.data[:, 1]
    month_float = y + (m - 1) / 12
    ppmvs = ml_data.target

    for month, ppmv in zip(month_float, ppmvs):
        if not months or month != months[-1]:
            months.append(month)
            ppmv_sums.append(ppmv)
            counts.append(1)
        else:
            # aggregate monthly sum to produce average
            ppmv_sums[-1] += ppmv
            counts[-1] += 1

    months = np.asarray(months).reshape(-1, 1)
    avg_ppmvs = np.asarray(ppmv_sums) / counts
    return months, avg_ppmvs


