In [1]:
import numpy as np
import pandas as pd

# Data preprocessing

In [2]:
indoorData = pd.read_csv("weeklyIndoorTemp.csv") # read .csv data

In [4]:
indoorData.head()

Unnamed: 0,Date & Time,Humidity,Temperature,Fahrenheit,No of Occupant
0,[2022-11-18 08:54:00.274],52.1,20.3,68.54,0
1,[2022-11-18 08:54:01.287],52.1,20.3,68.54,0
2,[2022-11-18 08:54:02.287],52.1,20.3,68.54,0
3,[2022-11-18 08:54:03.294],52.1,20.3,68.54,0
4,[2022-11-18 08:54:04.297],52.1,20.3,68.54,0


Look at the data, we need to transform the Date & Time into numeric form, so it is easier to work with

In [9]:
import datetime

inputDateTime = "[2022-11-18 08:54:00.274] " # one sample of 'Date & Time'
inputDateTimeObject = datetime.datetime.fromisoformat(inputDateTime[1:-2]) # convert to datetime object
inputDateTimeObject.timestamp() # convert to timestamp

1668758040.274

In [11]:
# same logic as above but for all rows in 'Date & Time' column
indoorData["Timestamp"] = indoorData["Date & Time"].apply(
    lambda x: datetime.datetime.fromisoformat(x[1:-2]).timestamp())

In [39]:
indoorData.head()

Unnamed: 0,Date & Time,Humidity,Temperature,Fahrenheit,No of Occupant,Timestamp
0,[2022-11-18 08:54:00.274],52.1,20.3,68.54,0,1668758000.0
1,[2022-11-18 08:54:01.287],52.1,20.3,68.54,0,1668758000.0
2,[2022-11-18 08:54:02.287],52.1,20.3,68.54,0,1668758000.0
3,[2022-11-18 08:54:03.294],52.1,20.3,68.54,0,1668758000.0
4,[2022-11-18 08:54:04.297],52.1,20.3,68.54,0,1668758000.0


In [13]:
# construct data for machine learning
X = indoorData[["Humidity", "No of Occupant", "Timestamp"]]
y = indoorData["Temperature"]

In [23]:
X.head()

Unnamed: 0,Humidity,No of Occupant,Timestamp
0,52.1,0,1668758000.0
1,52.1,0,1668758000.0
2,52.1,0,1668758000.0
3,52.1,0,1668758000.0
4,52.1,0,1668758000.0


In [24]:
y.head()

0    20.3
1    20.3
2    20.3
3    20.3
4    20.3
Name: Temperature, dtype: float64

# Machine Learning

### Temperature prediction using Linear Regression

In [15]:
from sklearn.linear_model import LinearRegression

In [28]:
model = LinearRegression().fit(X.to_numpy(), y.to_numpy())

In [32]:
model.predict(X.iloc[0:1].to_numpy())

array([18.80047538])

In [22]:
model.score(X, y)

0.17875704337085307

### Temperature prediction using Decision Tree

In [33]:
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeRegressor

In [34]:
regressor = DecisionTreeRegressor(random_state=0)
cross_val_score(regressor, X.to_numpy(), y.to_numpy(), cv=10)

array([-1.59292984,  0.30210855,  0.84987323, -4.5083698 ,  0.3678542 ,
        0.33271314,  0.48445631, -1.77938961,  0.40669841, -2.96122142])

In [36]:
regressor.fit(X.to_numpy(), y.to_numpy())

In [37]:
regressor.predict(X.iloc[0:1].to_numpy())

array([20.3])

In [38]:
regressor.score(X.to_numpy(), y.to_numpy())

0.9990321125053702