# Q1: Time Average

In [1]:
from datetime import datetime as dt
from cmath import rect, phase
from math import radians, degrees
import numpy as np
times = ['12:30:30AM\n', '08:00:30AM\n', '09:00:30am\n', '12:30:30pm\n', '04:00:30PM']
cleaned_times = [dt.strptime(i.upper()[:10],'%I:%M:%S%p').strftime('%H:%M:%S') for i in times]
def meanAngle(deg):
    complexDegree = sum(rect(1, radians(d)) for d in deg) / len(deg)
    argument = phase(complexDegree)
    meanAngle = degrees(argument)
    return meanAngle

def meanTime(times):
    t = (time.split(':') for time in times)
    seconds = ((float(s) + int(m) * 60 + int(h) * 3600) 
               for h, m, s in t)
    day = 24 * 60 * 60
    toAngles = [s * 360. / day for s in seconds]
    meanAsAngle = meanAngle(toAngles)
    meanSeconds = meanAsAngle * day / 360.
    if meanSeconds < 0:
        meanSeconds += day
    h, m = divmod(meanSeconds, 3600)
    m, s = divmod(m, 60)
    ampm = "AM"
    if h > 12:
        h =- 12
        ampm = "PM"       
    return('%02i:%02i%s' % (h, m, ampm))

print(meanTime(cleaned_times))

10:30AM


# Q2: derivative braking

In [2]:
CSV_lines = ['0.5',
'0.0,0',
'0.5,1',
'1.0,2',
'1.5,5',
'2.0,4',
'2.5,4',
'3.0,3',
'3.5,2',
'4.0,2',
'4.5,2']

import pandas as pd

def check(lines_parsed):
    #get threshold for hard breaking
    check = float(lines_parsed[0][0])
    #recreate table
    df = pd.DataFrame(lines_parsed, columns = ['times', 'speeds'])[1:].astype(float)
    #create accelerations from speeds over time
    df['accelerations'] = df.diff(2)['speeds'].reset_index(drop=True)
    #check if acceleration is negative and above hard breaking threshold
    df['hard braking'] = df['accelerations'] < -check
    #create separate dataframe to check for hard breaking blocks
    df2 = df.applymap(lambda x: 1 if x else 0)
    #check when hard breaking starts and ends
    df2 = df2.diff()
    #count the number of instances of hard breaking
    print((df2['hard braking']==1).sum())
    
if __name__ == "__main__":
    lines_parsed: [list[list[float]]] = []
    for line in CSV_lines:
        parsed_line = line.strip('\n').split(',')
        lines_parsed.append(parsed_line)
    check(lines_parsed)

1


# Q3: Gradient Descent

In [3]:
import pandas as pd
premium = [421, 279, 311] 
room_count = [8, 4, 5] 
house_age = [33, 43, 42]
df = pd.DataFrame([premium, room_count, house_age]).T.astype(float)
df.columns = ['premium', 'room_count', 'house_age'] 
df['room_count2'] = df['room_count']**2 
room_count2 = df['room_count2'] 
house_age = df['house_age'] 
premium = df['premium']
room_count = df['room_count']
k1 = 1
k2 = 1
b = 1
d_k1_prev = 0
d_k2_prev = 0
d_b_prev = 0

alpha = 0.0001
threshold = 10**-6
epochs = 1

N = float(len(df))

while (True): 
    f = premium - (k1*room_count2 + k2*house_age + b)  # The current predicted value of Z
    d_k1 = alpha * (-4*k1/N) * room_count.dot(f).sum() # Derivative wrt k1...might be missing a chain rule
    d_k2 = alpha * (-2/N) * house_age.dot(f).sum() # Derivative wrt k2...might be missing a chain rule
    d_b = alpha * (-2/N) * (f).sum()  # Derivative wrt b
    if (abs(d_k1_prev - d_k1) < threshold) & (abs(d_k2_prev - d_k2) < threshold) & (abs(d_b_prev - d_b) < threshold):
        break
    d_k1_prev = d_k1
    d_k2_prev = d_k2
    d_b_prev = d_b
    k1 -= d_k1
    k2 -= d_k2
    b -= d_b
    epochs += 1
print ('k1 = {:.4f}\nk2 = {:.4f}\nb = {:.4f}\nn_iter = {:.0f}'.format(k1, k2, b, epochs))

k1 = 3.9900
k2 = 4.9893
b = 1.1126
n_iter = 163


In [4]:
k1 = 1
k2 = 1
b = 1
epochs = 156

for _ in range(epochs): 
    f = premium - (k1*room_count2 + k2*house_age + b)  # The current predicted value of Z
    k1 -= alpha * (-4*k1/N) * room_count.dot(f).sum() # Derivative wrt k1...might be missing a chain rule
    k2 -= alpha * (-2/N) * house_age.dot(f).sum() # Derivative wrt k2...might be missing a chain rule
    b -= alpha * (-2/N) * (f).sum()  # Derivative wrt c

print ('k1 = {:.4f}\nk2 = {:.4f}\nb = {:.4f}\nn_iter = {:.0f}'.format(k1, k2, b, epochs))

k1 = 3.9898
k2 = 4.9894
b = 1.1126
n_iter = 156


In [9]:
from sympy import symbols, diff
x, y, z = symbols('x y z', real=True)
a, b, c, n = symbols('a b c n', real=True)

f = 1/n * (z - (a*x**2 + b*y + c))**2
diff(f, x)

-4*a*x*(-a*x**2 - b*y - c + z)/n

In [10]:
diff(f, y)

-2*b*(-a*x**2 - b*y - c + z)/n

In [11]:
diff(f, z)

(-2*a*x**2 - 2*b*y - 2*c + 2*z)/n