In [3]:
import pandas as pd
import math
import functools

In [4]:
iris = pd.read_csv('datasets/iris.data', header=0, names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'])

iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa
...,...,...,...,...,...
144,6.7,3.0,5.2,2.3,Iris-virginica
145,6.3,2.5,5.0,1.9,Iris-virginica
146,6.5,3.0,5.2,2.0,Iris-virginica
147,6.2,3.4,5.4,2.3,Iris-virginica


In [5]:
example = [
    [2.1, 0.2, 3.0, 'c1'],
    [3.3, 1.0, 2.9, 'c1'],
    [2.7, 1.2, 3.4, 'c1'],
    [0.5, 5.3, 0.0, 'c2'],
    [1.5, 4.7, 0.5, 'c2'],
]

example_df = pd.DataFrame(example, columns=['x1', 'x2', 'x3', 'class'])

In [17]:
def generate_pdf(df, class_label, sigma_sq):
    class_group = df.groupby(['class']).get_group(class_label).iloc[:,:-1]
    print(class_group)
    m = class_group.shape[0]
    k = 1/(m * (sigma_sq**0.5) * (2 * math.pi)**0.5)
    print('k:', k)

    # For every column of the class_group, we need to compute one of the sub-pdf
    sub_pdfs = []
    for (column, data) in class_group.iteritems():
        expressions = []
        for mu in data.values:
            expressions.append(generate_exp(mu, sigma_sq))
        sub_pdfs.append(generate_sub_pdf(k, expressions))

    def pdf(x):
        zipped = zip(sub_pdfs, x)
        start = 1
        for sub_pdf, value in zipped:
            v = sub_pdf(value)
            start *= v
            print('pdf: ', value, v, start)
        return start
    return pdf

def generate_exp(mu, sigma_sq):
    def exp(x):
        value = math.e**(-((x-mu)**2/2*sigma_sq))
        print('exp: ', x, mu, sigma_sq, value)
        return value
    return exp

def generate_sub_pdf(k, exps):
    def sub_pdf(x):
        return k * sum([e(x) for e in exps])
    return sub_pdf

def generate_classifiers(df, class_labels, sigma_sq):
    pdfs = {}
    for label in class_labels:
        pdfs[label] = generate_pdf(df, label, sigma_sq)

    def classifier(inputs):
        ans = ''
        ans_score = 0
        for l, pdf in pdfs.items():
            score = pdf(inputs)
            print('classifier:', l, score)
            if score > ans_score:
                ans = l
                ans_score = score
        return ans

    return classifier

In [10]:
setosa_pdf = generate_pdf(iris, 'Iris-setosa', 1)

    sepal_length  sepal_width  petal_length  petal_width
0            4.9          3.0           1.4          0.2
1            4.7          3.2           1.3          0.2
2            4.6          3.1           1.5          0.2
3            5.0          3.6           1.4          0.2
4            5.4          3.9           1.7          0.4
5            4.6          3.4           1.4          0.3
6            5.0          3.4           1.5          0.2
7            4.4          2.9           1.4          0.2
8            4.9          3.1           1.5          0.1
9            5.4          3.7           1.5          0.2
10           4.8          3.4           1.6          0.2
11           4.8          3.0           1.4          0.1
12           4.3          3.0           1.1          0.1
13           5.8          4.0           1.2          0.2
14           5.7          4.4           1.5          0.4
15           5.4          3.9           1.3          0.4
16           5.1          3.5  

In [12]:
setosa_pdf([2.3, 3.4, 1.6, 0.1])

exp:  2.3 4.9 1 0.03404745473459931
exp:  2.3 4.7 1 0.056134762834133684
exp:  2.3 4.6 1 0.07100535373963703
exp:  2.3 5.0 1 0.026121409853918226
exp:  2.3 5.4 1 0.00818870101437407
exp:  2.3 4.6 1 0.07100535373963703
exp:  2.3 5.0 1 0.026121409853918226
exp:  2.3 4.4 1 0.11025052530448513
exp:  2.3 4.9 1 0.03404745473459931
exp:  2.3 5.4 1 0.00818870101437407
exp:  2.3 4.8 1 0.04393693362340743
exp:  2.3 4.8 1 0.04393693362340743
exp:  2.3 4.3 1 0.1353352832366127
exp:  2.3 5.8 1 0.002187491118182886
exp:  2.3 5.7 1 0.003088715408236767
exp:  2.3 5.4 1 0.00818870101437407
exp:  2.3 5.1 1 0.0198410947443703
exp:  2.3 5.7 1 0.003088715408236767
exp:  2.3 5.1 1 0.0198410947443703
exp:  2.3 5.4 1 0.00818870101437407
exp:  2.3 5.1 1 0.0198410947443703
exp:  2.3 4.6 1 0.07100535373963703
exp:  2.3 5.1 1 0.0198410947443703
exp:  2.3 4.8 1 0.04393693362340743
exp:  2.3 5.0 1 0.026121409853918226
exp:  2.3 5.0 1 0.026121409853918226
exp:  2.3 5.2 1 0.014920786069067831
exp:  2.3 5.2 1 0.014920

0.0008240246462661296

In [9]:
example_pdf = generate_pdf(example_df, 'c1', 1)
example_pdf([1.4, 3.3, 3.0])

    x1   x2   x3
0  2.1  0.2  3.0
1  3.3  1.0  2.9
2  2.7  1.2  3.4
k: 0.1329807601338109
exp:  1.4 2.1 1 0.782704538241868
exp:  1.4 3.3 1 0.16447445657715493
exp:  1.4 2.7 1 0.42955735821073904
pdf:  1.4 0.1830794467297484 0.1830794467297484
exp:  3.3 0.2 1 0.008188701014374092
exp:  3.3 1.0 1 0.07100535373963703
exp:  3.3 1.2 1 0.11025052530448533
pdf:  3.3 0.025192484259409457 0.004612226079960579
exp:  3.0 3.0 1 1.0
exp:  3.0 2.9 1 0.9950124791926823
exp:  3.0 3.4 1 0.9231163463866359
pdf:  3.0 0.38805498939392263 0.0017897973425414757


0.0017897973425414757

In [18]:
example_classifier = generate_classifiers(example_df, ['c1', 'c2'], 1)
example_classifier([1.4, 3.3, 3.0])

    x1   x2   x3
0  2.1  0.2  3.0
1  3.3  1.0  2.9
2  2.7  1.2  3.4
k: 0.1329807601338109
    x1   x2   x3
3  0.5  5.3  0.0
4  1.5  4.7  0.5
k: 0.19947114020071635
exp:  1.4 2.1 1 0.782704538241868
exp:  1.4 3.3 1 0.16447445657715493
exp:  1.4 2.7 1 0.42955735821073904
pdf:  1.4 0.1830794467297484 0.1830794467297484
exp:  3.3 0.2 1 0.008188701014374092
exp:  3.3 1.0 1 0.07100535373963703
exp:  3.3 1.2 1 0.11025052530448533
pdf:  3.3 0.025192484259409457 0.004612226079960579
exp:  3.0 3.0 1 1.0
exp:  3.0 2.9 1 0.9950124791926823
exp:  3.0 3.4 1 0.9231163463866359
pdf:  3.0 0.38805498939392263 0.0017897973425414757
classifier: c1 0.0017897973425414757
exp:  1.4 0.5 1 0.6669768108584745
exp:  1.4 1.5 1 0.9950124791926823
pdf:  1.4 0.3315188986878833 0.3315188986878833
exp:  3.3 5.3 1 0.1353352832366127
exp:  3.3 4.7 1 0.37531109885139935
pdf:  3.3 0.10185921607446642 0.03376825513421825
exp:  3.0 0.0 1 0.01110899653824231
exp:  3.0 0.5 1 0.04393693362340743
pdf:  3.0 0.010980074452753275 

'c1'

In [31]:
iris_labels = iris['class'].unique().tolist()
data = [6.7, 3.9, 4.5, 1.6]
iris_classifier = generate_classifiers(iris, iris_labels, 1)
iris_classifier(data)

    sepal_length  sepal_width  petal_length  petal_width
0            4.9          3.0           1.4          0.2
1            4.7          3.2           1.3          0.2
2            4.6          3.1           1.5          0.2
3            5.0          3.6           1.4          0.2
4            5.4          3.9           1.7          0.4
5            4.6          3.4           1.4          0.3
6            5.0          3.4           1.5          0.2
7            4.4          2.9           1.4          0.2
8            4.9          3.1           1.5          0.1
9            5.4          3.7           1.5          0.2
10           4.8          3.4           1.6          0.2
11           4.8          3.0           1.4          0.1
12           4.3          3.0           1.1          0.1
13           5.8          4.0           1.2          0.2
14           5.7          4.4           1.5          0.4
15           5.4          3.9           1.3          0.4
16           5.1          3.5  

'Iris-versicolor'

In [29]:
iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa
...,...,...,...,...,...
144,6.7,3.0,5.2,2.3,Iris-virginica
145,6.3,2.5,5.0,1.9,Iris-virginica
146,6.5,3.0,5.2,2.0,Iris-virginica
147,6.2,3.4,5.4,2.3,Iris-virginica
