# ECE 204: Assessment 3 (Fall 2022)

In [10]:
# Import Statements
import pandas as pd
import numpy as np

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV


## Item 3: Classification

Please add cells below as necessary to answer the questions in this item. Do not change the pre-set random seeds.

SyntaxError: unmatched ')' (1511991004.py, line 23)

In [7]:
# Read in Data
df_train = pd.read_csv('sensors_train.csv')
df_test = pd.read_csv('sensors_test.csv')
df_train.head()

X_train = df_train.loc[:,['Sensor_1','Sensor_2', 'Sensor_3', 'Sensor_4']]
y_train = df_train['Status']

X_test = df_test.loc[:,['Sensor_1','Sensor_2', 'Sensor_3', 'Sensor_4']]
y_test = df_test['Status']

In [9]:
# Find training and testing accuracies for a given max depth 
clf = DecisionTreeClassifier(max_depth = 15, random_state = 42)

# Predict on the training and testing data and compute accuracy
clf.fit(X_train, y_train)
y_train_pred = clf.predict(X_train)
training_accuracy = accuracy_score(y_train, y_train_pred) * 100  
y_test_pred = clf.predict(X_test)
testing_accuracy = accuracy_score(y_test, y_test_pred) * 100  

# Feature importances
feature_importances = clf.feature_importances_
sensors_in_order_of_importance = sorted(zip(['Sensor_1', 'Sensor_2', 'Sensor_3', 'Sensor_4'], feature_importances), 
                                        key=lambda x: x[1], reverse=True)
# Printing answers 
print(f"Training Accuracy: {training_accuracy:.2f}%")
print(f"Testing Accuracy: {testing_accuracy:.2f}%")
print("Sensors in order of importance:", [sensor for sensor, _ in sensors_in_order_of_importance])


Training Accuracy: 88.62%
Testing Accuracy: 61.25%
Sensors in order of importance: ['Sensor_1', 'Sensor_3', 'Sensor_4', 'Sensor_2']


In [12]:
# Model selection via cross validation

# Initialize a new DecisionTreeClassifier
clf = DecisionTreeClassifier(random_state=42)
param_grid = {'max_depth': list(range(1, 16))}

# Initialize the GridSearchCV object with 5-fold cross-validation
cv_clf = GridSearchCV(clf, param_grid, cv=5)
cv_clf.fit(X_train, y_train)

optimal_max_depth = cv_clf.best_params_['max_depth']

# Retrain the classifier using the optimal max_depth
clf_optimal = DecisionTreeClassifier(max_depth=optimal_max_depth, random_state=42)
clf_optimal.fit(X_train, y_train)

# Predict on the test data with the optimally trained classifier
y_test_pred_optimal = clf_optimal.predict(X_test)
testing_accuracy_optimal = accuracy_score(y_test, y_test_pred_optimal) * 100  # Convert to percent
print(f"Optimal max_depth: {optimal_max_depth}")
print(f"Testing Accuracy with Optimal max_depth: {testing_accuracy_optimal:.2f}%")

# Predict the status if all four sensors are measuring 50
predicted_status = clf_optimal.predict(np.array([[50, 50, 50, 50]]))
print(f"Predicted status when all sensors measure 50: {predicted_status[0]}")

feature_importances = clf_optimal.feature_importances_

# Determine which sensors have a non-zero importance
sensors = ['Sensor_1', 'Sensor_2', 'Sensor_3', 'Sensor_4']
used_sensors = [sensor for sensor, importance in zip(sensors, feature_importances) if importance > 0]

print("Sensors used by the classifier:", used_sensors)


Optimal max_depth: 2
Testing Accuracy with Optimal max_depth: 71.25%
Predicted status when all sensors measure 50: Green
Sensors used by the classifier: ['Sensor_1', 'Sensor_3']




In [None]:
# Retrain classifier with optimal max_depth
clf = DecisionTreeClassifier(max_depth = ??, random_state = 42)


## Item 4: Linear Regression

Please add/delete cells as necessary below to answer the questions in this item.

In [13]:
# Read in Data
df_train = pd.read_csv('sensors_train.csv')
df_test = pd.read_csv('sensors_test.csv')

Xr_train = df_train.loc[:,['Sensor_1','Sensor_2', 'Sensor_3']]
yr_train = df_train['Sensor_4']

In [None]:
As a cost-cutting measure, you are tasked with creating a model that estimates the measurements of sensor 4 using the measurements of the other three sensors. That is, we would like to create the following model: 
Sensor_4 = B0 + B1 * Sensor_1 + B2 * Sensor_2 + B3 * Sensor_3

For this task, use Xr_train and yr_train given in the starter notebook.

a.) Enter B0 (round to the nearest integer).

Question 2
b.) Enter B1 (round to the nearest integer).

Question 3
c.) Enter B2 (round to the nearest integer).

Question 4
d.) Enter B3 (round to the nearest integer).
Question 5
Enter R-squared(computed on the same dataset used to fit the model, as a number between 0 and 1 rounded to two decimal places)


In [14]:
# We'll use LinearRegression to create a model as instructed.
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
# Fit the model to the training data
lr.fit(Xr_train, yr_train)
B0 = round(lr.intercept_)
B1, B2, B3 = [round(coef) for coef in lr.coef_]

R_squared = lr.score(Xr_train, yr_train)

print(f"B0 (Intercept): {B0}")
print(f"B1 (Sensor_1 coefficient): {B1}")
print(f"B2 (Sensor_2 coefficient): {B2}")
print(f"B3 (Sensor_3 coefficient): {B3}")
print(f"R-squared: {R_squared:.2f}")


B0 (Intercept): 4
B1 (Sensor_1 coefficient): 2
B2 (Sensor_2 coefficient): -1
B3 (Sensor_3 coefficient): 3
R-squared: 0.96
