# Set up and initializations

Run this at the start.

In [4]:
import pandas as pd
from IPython.display import display

# Dictionary that maps from abbreviation to feature in table (i.e. r0 = 'White alone')
from dictionary import dictionary as dictt 

# Initializing tables
maritalStatus = pd.read_csv('data/maritalStatus.csv') 
race = pd.read_csv('data/race.csv') 
nativity = pd.read_csv('data/nativity.csv')   
education = pd.read_csv('data/education.csv') 
employment = pd.read_csv('data/employment.csv') 
income = pd.read_csv('data/income.csv') 
tenure = pd.read_csv('data/tenure.csv')  

# Setting values for percentages of each marriage status
mStats = maritalStatus['Percentage']

# Helper Methods

```jointP(event, numMarriages)``` returns the joint probability of an event and a marriage outcome. It expects the following as inputs:
1. event is a string abbreviation of a subfeature (i.e. r0 --> 'White Alone')
2. numMarriages is an int, where the number indicates number of marriages (i.e. 0 for never, 1 for once, 2 for twice, 3 for three times or more)

```conditionalP(event, numMarriages)``` returns the conditional probability between an event and a marriage outcome. Expects the same inputs as jointP

In [5]:
# Returns the joint probability of an event and a marriage outcome
def jointP(event, numMarriages):
    # Look up feature mapped to event abbreviation in dictionary, dictt
    e = dictt[event]
    if (len(event) == 2):
        feature = event[0]
        if (feature == 'r'):
            return race[e][numMarriages]
        elif (feature == 'n'):
            return nativity[e][numMarriages]
        elif (feature == 'i'):
            return income[e][numMarriages]
        elif (feature == 't'):
            return tenure[e][numMarriages]
        else:
            return "Unacceptable input. Feature not found."
    else:
        feature = event[:2]
        if (feature == 'ed'):
            return education[e][numMarriages]
        elif (feature == 'em'):
            return employment[e][numMarriages]
        else:
            return "Unacceptable input. Feature not found."
    
# Returns the conditional probability between an event and a marriage outcome
def conditionalP(event, numMarriages):
    return jointP(event, numMarriages)/mStats[numMarriages]


# Prediction

Implements a Naive Bayes algorithm to predict the probabilities of a person being married once, twice, three or more times, or never based on his/her background in race, nativity, education, employment, income, and/or tenure.

In [6]:
from Queue import PriorityQueue

# Prints out probabilities of all the marriage outcome in ASCENDING order given a person's background
# features is an array of string abbreviations of a person's background
def predict(features):
    # Initializing relative probabilityies for 4 types of marriages
    never = mStats[0]
    once = mStats[1]
    twice = mStats[2]
    three = mStats[3]
    
    # Iterate through each feature
    for f in features:
        # Calculate relative probability for 4 marriage outcomes
        never *= conditionalP(f, 0)
        once *= conditionalP(f, 1)
        twice *= conditionalP(f, 2)
        three *= conditionalP(f, 3)
    
    # Calculate actual probability for 4 marriage outcomes
    total = never + once + twice + three
    p0 = never/total
    p1 = once/total
    p2 = twice/total
    p3 = three/total
    
    q = PriorityQueue()
    q.put((p0,'never'))
    q.put((p1,'once'))
    q.put((p2,'twice'))
    q.put((p3,'three'))

    while not q.empty():
        print q.get()

predict(['r1', 'n0', 'ed1', 'em2', 'i1', 't0'])        

(0.085651312666160315, 'three')
(0.11885061782676595, 'never')
(0.22958061406237179, 'twice')
(0.56591745544470196, 'once')
