In [1]:
%load_ext snakeviz

In [7]:
%%snakeviz
# https://blog.finxter.com/python-cprofile-a-helpful-guide-with-prime-example/
import random

def guess():
    ''' Returns a random number '''
    return random.randint(2, 1000)

def is_prime(n):
    ''' Checks whether n is prime '''
    for i in range(2, n):
        for j in range(2, n):
            if i * j == n:
                return False
    return True

def find_primes(num):
    primes = []
    while len(primes) < num:
        p = guess()
        if is_prime(p):
        	primes.append(p)
    return primes

print(find_primes(100))

[911, 613, 179, 827, 269, 251, 701, 163, 223, 659, 53, 71, 587, 491, 401, 631, 47, 887, 151, 947, 29, 647, 929, 743, 977, 751, 719, 31, 47, 317, 857, 953, 5, 131, 557, 173, 911, 107, 101, 3, 359, 911, 193, 997, 811, 857, 907, 331, 191, 601, 883, 587, 107, 751, 521, 197, 17, 521, 89, 379, 83, 457, 971, 163, 761, 983, 461, 433, 757, 127, 409, 929, 541, 223, 233, 751, 61, 101, 383, 193, 7, 877, 263, 457, 431, 641, 569, 379, 991, 383, 157, 587, 233, 107, 67, 911, 859, 853, 227, 487]
 
*** Profile stats marshalled to file '/var/folders/qn/3hj7mcx56k19b_09n6dymw8h0000gn/T/tmp33liesbq'. 
Embedding SnakeViz in this document...


In [18]:
%%snakeviz

import warnings
warnings.simplefilter('ignore', FutureWarning)
warnings.simplefilter('ignore', UserWarning)

import zipfile

import numpy as np
import pandas as pd


def logodds(X, a):
    # X: (n,m), a: (m,) -> (1,m) -> (n,m)
    Z = (X * a).sum(axis=1)  # -> (n,)
    return Z


def cross_entropy(X, Y, a):
    Z = logodds(X, a)
    logliks = -Z * (1 - Y) - np.log(1 + np.exp(-Z))
    return -logliks.mean()


def expit(Z):
    return 1 / (1 + np.exp(-Z))


def gradient_descent(X, Y, a, η=0.01):
    nsamples = Y.shape[0]
    Z = logodds(X, a)
    Yhat = expit(Z)
    δ = Yhat - Y
    # X: (n,m), δ: (n,) -> (n,1) -> (n,m)
    δ = δ.reshape((nsamples, 1))
    grad = (X * δ).sum(axis=0) / nsamples # -> (m,)
    assert grad.shape == a.shape, grad.shape
    return a - η * grad


def logistic_model(X, Y, a=(1, 1, 1), iters=50000):
    a = np.array(a)
    for t in range(iters+1):
        a = gradient_descent(X, Y, a)
        if t % (iters//10) == 0:
            print("{}: loss={:.6f}, a={}".format(t, cross_entropy(X, Y, a), a))
    return a


def load_data():
    filename = '../scripts/profiler/titanic.zip'
    titanic_zip = zipfile.ZipFile(filename)
    with titanic_zip.open('data/titanic_train.csv') as f:
        df = pd.read_csv(f)


    df = df[['Sex', 'Age', 'Pclass', 'Survived']].copy() # what if I don't put .copy?
    df['Sex'] = df['Sex'] == 'female' # convert to boolean
    df['Sex'] = df['Sex'].astype(int) # then convert to int
    df = df.dropna() # remove rows with "not a number" elements

    features = ['Sex', 'Age', 'Pclass']
    X = df[features].values
    Y = df['Survived'].values
    return X, Y


if __name__ == '__main__':
    X, Y = load_data()

    a = logistic_model(X, Y)

    print("Odds-ratios:")
    print(np.exp(a))

0: loss=16.424619, a=[0.99910465 0.81814023 0.98525564]
5000: loss=0.477268, a=[ 2.45084731 -0.00336839 -0.61851448]
10000: loss=0.476570, a=[ 2.61310454 -0.00345444 -0.6479314 ]
15000: loss=0.476554, a=[ 2.63827447 -0.00346145 -0.6525979 ]
20000: loss=0.476553, a=[ 2.64229717 -0.00346241 -0.65334631]
25000: loss=0.476553, a=[ 2.64294312 -0.00346256 -0.65346656]
30000: loss=0.476553, a=[ 2.64304692 -0.00346259 -0.65348588]
35000: loss=0.476553, a=[ 2.6430636  -0.00346259 -0.65348899]
40000: loss=0.476553, a=[ 2.64306628 -0.00346259 -0.65348949]
45000: loss=0.476553, a=[ 2.64306671 -0.00346259 -0.65348957]
50000: loss=0.476553, a=[ 2.64306678 -0.00346259 -0.65348958]
Odds-ratios:
[14.05624498  0.9965434   0.52022723]
 
*** Profile stats marshalled to file '/var/folders/qn/3hj7mcx56k19b_09n6dymw8h0000gn/T/tmp8rufy7dj'. 
Embedding SnakeViz in this document...
