In [2]:
"""
Import + test/train sets to NumPy Arrays
"""
# Import libraries & data - Set up training/test set ndarrays
import pandas as pd
import numpy as np

df_train = pd.read_csv("/Users/FATMac/Downloads/titanic_train.csv")
df_test = pd.read_csv("/Users/FATMac/Downloads/titanic_test.csv")

train_set = df_train.values
test_set = df_test.values

In [3]:
"""
Data Wrangling
Data should be reshaped as m x n design matrix.
Examples are row vectors (qty m)
Features are column vectors (qty N = n - 1)
"""
# Extract single feature examples ("Pclass") 
train = train_set[:,[2, 5, 1]].astype(float)
test = test_set[:,[1, 4]].astype(float)

#if(train.ndim == 1):
#    train = train[:, np.newaxis]

# Mask out examples (rows) w/ NaN's in training set
train = train[~np.isnan(train).any(axis=1)]
test = test[~np.isnan(test).any(axis=1)]

# Prepend constant offset term to X for model intercept
train = np.insert(train, 0, 1, axis=1)
test = np.insert(test, 0, 1, axis=1)

X = train[:, 0:3] # slice feature values (first n - 1 columns)
y = train[:, -1] # slice out labels (last column)
X_test = test

# Feature scaling - Every column gets scaled
N = X.shape[1]
for i in range(N):
    X[:, i] = (X[:, i] - np.mean(X[:, i])) / np.abs(np.max(X[:, i]))

In [4]:
"""
- Model definitions -
In array shape comments, m is num examples; n is num features + 1
"""
# A Numerically Stable Sigmoid
# "Z": ndarray shape (m, )
sigmoid = lambda Z : np.where(Z > 0., 
                              1. / (1. + np.exp(-Z)), 
                              np.exp(Z) / (1. + np.exp(Z)))

# Hypothesis: Decision boundary is a linear combination of feature
# values w/ learnable intercept, squished by sigmoid...
# "theta": ndarray shape (n, )
# "X": ndarray shape (m, n)
def h(theta, X):
    return sigmoid(X.dot(theta))

# Objective function to Optimize (not called): 
#     log-likelihood is convex ensuring convergence
#     L2 Regularized (avoid overfitting w/ large weights)
#
# "X": input feature values ndarray shape (m, n)
# "Y": observed labels from training data ndarray shape (m, )
# "theta": model parameter ndarray shape (m, n)
def J(X, Y, theta, tune):
    # p = degree of belief that our model assigns to positive class
    p = h(theta, X)
    # Unpack size of batch
    m, = Y.shape
    
    # Regularizer term
    L2_reg = (tune/(2.*m)) * np.dot(theta, theta)
    # Standard logistic regression cost function
    log_likelihood = np.sum(np.multiply(Y, np.log(p)) + (1. - Y) * np.log(1. - p)) / m
    
    # return cost over mini-batch        
    return (-log_likelihood + L2_reg)

def SGD_step(X_mini, y_mini, theta, alpha, tune):
    # Unpack size of mini batch
    m, = y_mini.shape
    
    # Calculate gradient using mini-batch
    err = h(theta, X_mini) - y_mini
    grad_J = (np.dot(X_mini.T, err) + tune * theta) / m
    
    # Compute theta_i+1
    theta_next = theta - alpha * grad_J
    theta_next[0] = theta_next[0] - alpha * np.mean(err)
    
    # return theta_i+1 and new cost (for convergence monitoring)
    return (theta_next, J(X_mini, y_mini, theta, tune))

In [5]:
"""
Data Exploration & Feature Engineering
"""
# Check for class imbalance
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(fit_intercept=True, verbose=10)
clf.fit(X[:, 1:3], y)

print(clf.intercept_, clf.coef_)
print ('Accuracy from sk-learn: {0}'.format(clf.score(X[:, 1:3], y)))

[LibLinear][-0.42390604] [[-3.13835483 -2.41928263]]
Accuracy from sk-learn: 0.7016806722689075


In [6]:
"""
Model Training and Evaluation
"""
# Hyperparameters of learning algorithm
alpha = 0.1 # learning rate

epochs = 300000 # num passes through full training data
iters_per_epoch = 1 # ratio of batch size to mini-batch size

tune = 0. # L2 regularization coefficient for overfitting control

# This yields 50/50 uncertainty in sigmoid on y
theta = np.zeros(shape=X.shape[-1], dtype=np.float64)


# Optimization Loop
for step in range(epochs):
    for iterate in range(iters_per_epoch):
        # mini-batch = full batch (non-stochastic)
        theta, cost = SGD_step(X, y, theta, alpha, tune)
        if step % 10000 == 0:
            print("cost ", step + 1, ": ", cost)

print("coefficients: ", theta)
y_hat = np.round(h(theta, X))
print("prediction accuracy: ", 1. - np.sum(np.abs(y_hat - y)/y.shape[0]))

cost  1 :  0.6931471805599453
cost  10001 :  0.599210815432797
cost  20001 :  0.5992060012730206
cost  30001 :  0.5992060003678475
cost  40001 :  0.599206000367676
cost  50001 :  0.5992060003676759
cost  60001 :  0.599206000367676
cost  70001 :  0.5992060003676759
cost  80001 :  0.5992060003676759
cost  90001 :  0.5992060003676759
cost  100001 :  0.5992060003676759
cost  110001 :  0.5992060003676759
cost  120001 :  0.5992060003676759
cost  130001 :  0.5992060003676759
cost  140001 :  0.5992060003676759
cost  150001 :  0.5992060003676759
cost  160001 :  0.5992060003676759
cost  170001 :  0.5992060003676759
cost  180001 :  0.5992060003676759
cost  190001 :  0.5992060003676759
cost  200001 :  0.5992060003676759
cost  210001 :  0.5992060003676759
cost  220001 :  0.5992060003676759
cost  230001 :  0.5992060003676759
cost  240001 :  0.5992060003676759
cost  250001 :  0.5992060003676759
cost  260001 :  0.5992060003676759
cost  270001 :  0.5992060003676759
cost  280001 :  0.5992060003676759
co

#!/bin/python3

import math
import os
import random
import re
import sys



# Complete the findNumber function below.
def findNumber(arr, k):
    if k in arr:
        return "YES"
    else:
        return "NO"
 
            


if __name__ == '__main__':
    fptr = open(os.environ['OUTPUT_PATH'], 'w')

    arr_count = int(input().strip())

    arr = []

    for _ in range(arr_count):
        arr_item = int(input().strip())
        arr.append(arr_item)

    k = int(input().strip())

    res = findNumber(arr, k)

    fptr.write(res + '\n')

    fptr.close()


In [9]:
id = np.identity(20) * 2
vec = np.ones(20) * 5

print (id, vec)
print(np.matmul(vec, id))

[[2. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 2. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 2. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 2. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 2. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 2. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 2. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 2. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 2. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 2. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 2. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 2. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 2. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 2. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 2. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 2. 0. 0.