In [None]:
!pip install nestle corner emcee

# Imports

In [None]:
import numpy
import numpy as np
import scipy
import scipy.stats
import pandas
import matplotlib.pyplot as plt
import seaborn as sns

# Read in the data:

In [None]:
data = pandas.read_csv( "RandomVariable_Generated_Data.dat",sep=' ',header=0)
print ( data )

# Some useful helper functions:

In [None]:
def SimpleColorPlotFromFunc( 
    Func2D = None,
    xmin = None,
    xmax = None, 
    ymin = None,
    ymax = None, 
    ):

    #Make the list of poitns to plug in from the boundaries:
    x = np.linspace(xmin, xmax, 100)
    y = np.linspace(ymin, ymax, 100)
    X, Y = np.meshgrid(x, y)
    PointsToPlugIn = numpy.vstack([X.ravel(), Y.ravel()])
    PointsToPlugInDataset = PointsToPlugIn.T


    #plug in the list of points:
    FunctionResultValuesForGrid = []
    for Point in PointsToPlugInDataset:
        Value = Func2D(Point)
        FunctionResultValuesForGrid.append(Value)

    #reshape stuff in a confusing way so matplotlib can think of the data like a matrix
    Z = numpy.reshape(FunctionResultValuesForGrid, X.shape)


    #Actually construct the figure...
    plt.figure()
    heatmap = plt.imshow( 
        numpy.rot90(Z), 
        extent=[xmin, xmax, ymin, ymax] ,
        aspect = 'auto' ,
        interpolation = None,
        )  
    

# Marginal Distributions:

### Make a histogram of the data from column A

In [None]:
Adata = data['A']
plt.hist(Adata, density=True,bins=50,label='A')
plt.xlabel('A')
plt.ylabel('Number')
plt.legend()
plt.show()

### Make a histogram of the data from column B

In [None]:
Bdata = data['B']
plt.hist(Bdata, density=True,bins=50,label='B')
plt.xlabel('B')
plt.ylabel('Number')
plt.legend()
plt.show()

### Approximate the data from column A as a univariate gaussian: (MARGINAL)

In [None]:
A_standard_deviation = numpy.std( Adata )
A_mean = numpy.mean(Adata)

ATrialPoints = numpy.linspace(-20, 20, 100)
AValuePoints = scipy.stats.norm.pdf(ATrialPoints, loc = A_mean, scale = A_standard_deviation )

plt.hist(Adata, density=True, bins=50,label='Binned A')
plt.plot(ATrialPoints, AValuePoints,label='Gaussian A')
plt.xlabel('A')
plt.ylabel('Number')
plt.legend()
plt.show()

### Approximate the data from column B as a univariate gaussian: (MARGINAL)

In [None]:
# STUDENTS WILL WORK ON THIS CELL
B_standard_deviation = numpy.std( Bdata )
B_mean = numpy.mean(Bdata)

BTrialPoints = numpy.linspace(-20, 20, 100)
BValuePoints = scipy.stats.norm.pdf(BTrialPoints, loc = B_mean, scale = B_standard_deviation )

plt.hist(Bdata, density=True, bins=50,label='Binned B')
plt.plot(BTrialPoints, BValuePoints,label='Gaussian B')
plt.xlabel('B')
plt.ylabel('Number')
plt.legend()
plt.show()

# Conditional distributions

### (1) Approximate the joint probability density function of A and B with a multivariate gaussian


In [None]:

ABdata = numpy.vstack( ( data['A'], data['B'] ) ).T
print('ABdata:', ABdata)
print('ABdata shape:', ABdata.shape)

ABMean = numpy.mean( ABdata, axis = 0 )
print ('ABdata Mean:',ABMean)

ABCovarianceMatrix = numpy.cov( ABdata, rowvar = False )
print ('ABdata Covariance Matrix:',ABCovarianceMatrix)

def jointGaussian(ABpoint):
    return scipy.stats.multivariate_normal.pdf( ABpoint, ABMean, ABCovarianceMatrix )

SimpleColorPlotFromFunc(jointGaussian,np.min(Adata),np.max(Adata),np.min(Bdata),np.max(Bdata))
plt.title("Probability Density of A & B", fontsize=30)
plt.ylabel('B',fontsize=40)
plt.xlabel('A',fontsize=40)
plt.draw()
plt.show()

### (1)  Fixing A = 0, plot the unnormalized conditional probability density of B:
P(B|A)
=====


In [None]:
def BdensityConditionalOnA0_unnormalized(Bpoint):
    return jointGaussian( [0, Bpoint] )

x_values=np.linspace(np.min(Bdata),np.max(Bdata),1000)
y_values=[] # this will be marginal P(B|A=0)
for x_value in x_values:
    y_values.append(BdensityConditionalOnA0_unnormalized(x_value))
plt.plot(x_values,y_values)
plt.ylabel('P(B|A)')
plt.xlabel('B')
plt.legend()
plt.show()

### (1) Fixing A = 7, plot the probability density of B:


In [None]:
# STUDENTS WILL WORK ON THIS CELL

A_fixed_values=[1,3,5,7]

x_values=np.linspace(np.min(Bdata),np.max(Bdata),1000)
 # this will be marginal P(B|A=0)
for Afixed in A_fixed_values:
    def BdensityConditionalOnA_unormalized(Bpoint):
        return jointGaussian( [Afixed, Bpoint] )
    y_values=[]
    for x_value in x_values:
        y_values.append(BdensityConditionalOnA_unormalized(x_value))
    plt.plot(x_values,y_values)

plt.ylabel('P(B|A)')
plt.xlabel('B')
plt.legend()
plt.show()


### (1) Fixing B = 0 plot the unnormalized probability density of A:


In [None]:
def AdensityConditionalOnB0_unnormalized(Apoint):
    return jointGaussian( [Apoint, 0] )

x_values=np.linspace(np.min(Adata),np.max(Adata),1000)
y_values=[] # this will be marginal P(B|A=0)
for x_value in x_values:
    y_values.append(AdensityConditionalOnB0_unnormalized(x_value))
plt.plot(x_values,y_values)

plt.ylabel('P(A|B)')
plt.xlabel('A')
plt.legend()
plt.show()


### (1) Fixing B = 7, plot the unnormalized probability density of A:


In [None]:
def AdensityConditionalOnB7_unnormalized(Apoint):
    return jointGaussian( [Apoint, 7] )

x_values=np.linspace(np.min(Adata),np.max(Adata),1000)
y_values=[] # this will be marginal P(B|A=0)
for x_value in x_values:
    y_values.append(AdensityConditionalOnB7_unnormalized(x_value))
plt.plot(x_values,y_values)


plt.ylabel('P(A|B)')
plt.xlabel('A')
plt.legend()
plt.show()