In [130]:
import pandas

!wget https://raw.githubusercontent.com/MicrosoftDocs/mslearn-introduction-to-machine-learning/main/graphing.py
!wget https://raw.githubusercontent.com/MicrosoftDocs/mslearn-introduction-to-machine-learning/main/m0b_optimizer.py
!wget https://raw.githubusercontent.com/MicrosoftDocs/mslearn-introduction-to-machine-learning/main/Data/seattleWeather_1948-2017.csv

# Load data
data = pandas.read_csv('seattleWeather_1948-2017.csv', parse_dates=['date'])

# Keep only January temperatures
data = data[[d.month == 1 for d in data.date]].copy()

data

--2023-01-27 10:00:47--  https://raw.githubusercontent.com/MicrosoftDocs/mslearn-introduction-to-machine-learning/main/graphing.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.111.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 21511 (21K) [text/plain]
Saving to: ‘graphing.py.6’


2023-01-27 10:00:47 (5.16 MB/s) - ‘graphing.py.6’ saved [21511/21511]

--2023-01-27 10:00:47--  https://raw.githubusercontent.com/MicrosoftDocs/mslearn-introduction-to-machine-learning/main/m0b_optimizer.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.111.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1287 (1.3K) [text/plain]
Saving to: ‘m0b_optimiz

Unnamed: 0,date,amount_of_precipitation,max_temperature,min_temperature,rain
0,1948-01-01,0.47,51,42,True
1,1948-01-02,0.59,45,36,True
2,1948-01-03,0.42,45,35,True
3,1948-01-04,0.31,45,34,True
4,1948-01-05,0.17,45,32,True
...,...,...,...,...,...
25229,2017-01-27,0.00,54,37,False
25230,2017-01-28,0.00,52,37,False
25231,2017-01-29,0.03,48,37,True
25232,2017-01-30,0.02,45,40,True


In [131]:
import graphing
import numpy as np

# Offset date into number of years since 1982
data["years_since_1982"] = [(d.year + d.timetuple().tm_yday / 365.25) - 1982 for d in data.date]

# Scale and offset temperature so that it has a smaller range of values
data["normalised_temperature"] = (data["min_temperature"] - np.mean(data["min_temperature"])) / np.std(data["min_temperature"])

graphing.scatter_2D(data, label_x="years_since_1982", label_y="normalised_temperature", title="January Temperatures (Normalised)")

In [132]:
class LinearRegressionModel:

  def __init__(self):
    self.slope = 0
    self.intercept = 0

  def predict(self,date):
    return date * self.slope + self.intercept

model = LinearRegressionModel()

print("Model created")


Model created


In [133]:
print(f"Model parameters before training {model.intercept} {model.slope}")

print("Model before training")

graphing.scatter_2D(data,"years_since_1982", "normalised_temperature", trendline=model.predict)   

Model parameters before training 0 0
Model before training


In [134]:
def cost_function(actual_temperatures, estimated_temperatures):
    difference = estimated_temperatures - actual_temperatures
    cost = sum(difference ** 2)
    return difference, cost

In [135]:
from m0b_optimizer import MyOptimizer

# Create an optimizer
optimizer = MyOptimizer()

In [136]:
def train_one_iteration(model_inputs, true_temperatures, last_cost:float):
    '''
    model_inputs: one or more dates to provide the model
    true_temperatures: corresponding temperatures knows to occur for the respective dates in model_inputs

    Returns: boolean, whether the training should continue, based on the cost function (smaller = better)
    '''

    # estimate values for the inputs
    estimated_temperatures = model.predict(model_inputs)

    # calculate how well the model runs
    difference, cost = cost_function(true_temperatures, estimated_temperatures)

    if cost >= last_cost:
        return False, cost
    else:
        intercept_update, slope_update = optimizer.get_parameter_updates(model_inputs, cost, difference)
        model.intercept += intercept_update
        model.slope += slope_update

        return True, cost



In [137]:
import math

print("Training beginning...")
last_cost = math.inf
i = 0
continue_loop = True

while continue_loop:
    continue_loop, last_cost = train_one_iteration(model_inputs=data["years_since_1982"],
                                            true_temperatures=data["normalised_temperature"],
                                            last_cost=last_cost)

    # Print the status
    if i % 400 == 0:
        print(f"Iteration: {i}")

    i += 1


print("Training complete!")
print(f"Model parameters after training:\t{model.intercept:.8f},\t{model.slope:.8f}")
graphing.scatter_2D(data, "years_since_1982", "normalised_temperature", trendline=model.predict)




Training beginning...
Iteration: 0
Iteration: 400
Iteration: 800
Iteration: 1200
Iteration: 1600
Iteration: 2000
Iteration: 2400
Iteration: 2800
Iteration: 3200
Iteration: 3600
Iteration: 4000
Training complete!
Model parameters after training:	-0.00648853,	0.01193327
