In [31]:
import pandas as pd
import math
import functools

In [2]:
iris = pd.read_csv('datasets/iris.data', header=0, names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'])

iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa
...,...,...,...,...,...
144,6.7,3.0,5.2,2.3,Iris-virginica
145,6.3,2.5,5.0,1.9,Iris-virginica
146,6.5,3.0,5.2,2.0,Iris-virginica
147,6.2,3.4,5.4,2.3,Iris-virginica


In [35]:
example = [
    [2.1, 0.2, 3.0, 'c1'],
    [3.3, 1.0, 2.9, 'c1'],
    [2.7, 1.2, 3.4, 'c1'],
    [0.5, 5.3, 0.0, 'c2'],
    [1.5, 4.7, 0.5, 'c2'],
]

example_df = pd.DataFrame(example, columns=['x1', 'x2', 'x3', 'class'])

In [70]:
def generate_pdf(df, class_label, sigma_sq):
    class_group = df.groupby(['class']).get_group(class_label).iloc[:,:-1]
    print(class_group)
    m = class_group.shape[0]
    k = 1/(m * (sigma_sq**0.5) * (2 * math.pi)**0.5)

    # For every column of the class_group, we need to compute one of the sub-pdf
    sub_pdfs = []
    for (column, data) in class_group.iteritems():
        expressions = []
        for mu in data.values:
            exp = generate_exp(mu, sigma_sq)
            expressions.append(exp)
        sub_pdfs.append(generate_sub_pdf(k, expressions))

    def pdf(x):
        zipped = zip(sub_pdfs, x)
        start = 1
        for sub_pdf, value in zipped:
            print('pdf: ', value, sub_pdf(value))
            start *= sub_pdf(value)
        return start
    return pdf

def generate_exp(mu, sigma_sq):
    def exp(x):
        value = math.e**(-((x-mu)**2/2*sigma_sq))
        print('exp: ', x, mu, sigma_sq, value)
        return value
    return exp

def generate_sub_pdf(k, exps):
    def sub_pdf(x):
        return k * sum([e(x) for e in exps])
    return sub_pdf

In [34]:
setosa_pdf = generate_pdf(iris, 'Iris-setosa', 1)

In [None]:
setosa_pdf()

In [71]:
example_pdf = generate_pdf(example_df, 'c2', 1)
example_pdf([1.4, 3.3, 3.0])

    x1   x2   x3
3  0.5  5.3  0.0
4  1.5  4.7  0.5
exp:  1.4 0.5 1 0.6669768108584745
exp:  1.4 1.5 1 0.9950124791926823
pdf:  1.4 0.3315188986878833
exp:  1.4 0.5 1 0.6669768108584745
exp:  1.4 1.5 1 0.9950124791926823
exp:  3.3 5.3 1 0.1353352832366127
exp:  3.3 4.7 1 0.37531109885139935
pdf:  3.3 0.10185921607446642
exp:  3.3 5.3 1 0.1353352832366127
exp:  3.3 4.7 1 0.37531109885139935
exp:  3.0 0.0 1 0.01110899653824231
exp:  3.0 0.5 1 0.04393693362340743
pdf:  3.0 0.010980074452753275
exp:  3.0 0.0 1 0.01110899653824231
exp:  3.0 0.5 1 0.04393693362340743


0.00037077795551328447