In [None]:
import pandas

!wget https://raw.githubusercontent.com/MicrosoftDocs/mslearn-introduction-to-machine-learning/main/graphing.py
!wget https://raw.githubusercontent.com/MicrosoftDocs/mslearn-introduction-to-machine-learning/main/m0b_optimizer.py
!wget https://raw.githubusercontent.com/MicrosoftDocs/mslearn-introduction-to-machine-learning/main/Data/seattleWeather_1948-2017.csv

# Load data
data = pandas.read_csv('seattleWeather_1948-2017.csv', parse_dates=['date'])

# Keep only January temperatures
data = data[[d.month == 1 for d in data.date]].copy()

data

In [None]:
import graphing
import numpy as np

# Offset date into number of years since 1982
data["years_since_1982"] = [(d.year + d.timetuple().tm_yday / 365.25) - 1982 for d in data.date]

# Scale and offset temperature so that it has a smaller range of values
data["normalised_temperature"] = (data["min_temperature"] - np.mean(data["min_temperature"])) / np.std(data["min_temperature"])

graphing.scatter_2D(data, label_x="years_since_1982", label_y="normalised_temperature", title="January Temperatures (Normalised)")

In [None]:
class LinearRegressionModel:

  def __init__(self):
    self.slope = 0
    self.intercept = 0

  def predict(self,date):
    return date * self.slope + self.intercept

model = LinearRegressionModel()

print("Model created")


In [None]:
print(f"Model parameters before training {model.intercept} {model.slope}")

print("Model before training")

graphing.scatter_2D(data,"years_since_1982", "normalised_temperature", trendline=model.predict)   

In [None]:
def cost_function(actual_temperatures, estimated_temperatures):
    difference = estimated_temperatures - actual_temperatures
    cost = sum(difference ** 2)
    return difference, cost

In [None]:
from m0b_optimizer import MyOptimizer

# Create an optimizer
optimizer = MyOptimizer()

In [None]:
def train_one_iteration(model_inputs, true_temperatures, last_cost:float):
    '''
    model_inputs: one or more dates to provide the model
    true_temperatures: corresponding temperatures knows to occur for the respective dates in model_inputs

    Returns: boolean, whether the training should continue, based on the cost function (smaller = better)
    '''

    # estimate values for the inputs
    estimated_temperatures = model.predict(model_inputs)

    # calculate how well the model runs
    difference, cost = cost_function(true_temperatures, estimated_temperatures)

    if cost >= last_cost:
        return False, cost
    else:
        intercept_update, slope_update = optimizer.get_parameter_updates(model_inputs, cost, difference)
        model.intercept += intercept_update
        model.slope += slope_update

        return True, cost



In [None]:
import math

print("Training beginning....")
last_cost = math.inf
i = 0
continue_loop = True

while continue_loop:
    continue_loop, last_cost = train_one_iteration(model_inputs=data["years_since_1982"],
                                            true_temperatures=data["normalised_temperature"],
                                            last_cost=last_cost)

    # Print the status
    if i % 400 == 0:
        print(f"Iteration: {i}")

    i += 1


print("Training complete!")
print(f"Model parameters after training:\t{model.intercept:.8f},\t{model.slope:.8f}")
graphing.scatter_2D(data, "years_since_1982", "normalised_temperature", trendline=model.predict)


