# The objective of this hands-on is to ensure that the development environment has been properly set up 

In [1]:
%matplotlib inline

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Load the required modules and libraries
import numpy as np
from pandas import read_csv
from matplotlib import pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
# fix random seed for reproducibility
np.random.seed(7)

In [4]:
# load dataset
dataraw = read_csv("heights_weights_genders.csv")
dataraw.sample(5)

Unnamed: 0,Height(cm),Weight(kg),Gender
1977,177.0,89.4,Male
3880,164.5,77.1,Male
52,174.0,78.7,Male
2551,176.0,78.6,Male
2246,165.6,70.9,Male


In [None]:
# Plot histograms
dataraw.hist()
plt.show()

In [None]:
# Seperate the dataset into features (X) and label (y)
dataset = dataraw.values
X = dataset[:,:-1]
y = dataset[:,-1]

mpos = y == 'Male'
fpos = y == 'Female'
plt.scatter(X[mpos, 0], X[mpos, 1], c='b', marker='s', alpha=0.05, label='Male')
plt.scatter(X[fpos, 0], X[fpos, 1], c='r', marker='o', alpha=0.05, label='Female')
plt.xlabel('Height')
plt.ylabel('Weight')
plt.legend(loc='best')
plt.show()

In [None]:
# split into input (X) and output (Y) variables
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f'X_train dimension: {X_train.shape}')
print(f'X_test dimension: {X_test.shape}')
print(f'y_train dimension: {y_train.shape}')
print(f'y_test dimension: {y_test.shape}')

In [None]:
# Scaling the features to the range between 0 and 1
print(f'Original minimum and maximum X_train: {X_train.min()}, {X_train.max()}')

scaler = MinMaxScaler(feature_range=(0, 1)).fit(X)
X2_train = scaler.fit_transform(X_train)
X2_test = scaler.transform(X_test)
print(f'Scaled minimum and maximum X_train: {X2_train.min()}, {X2_train.max()}')

In [None]:
# KNN classifier
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
predictions = knn.predict(X_test)
print(f'KNN accuracy: {100 * knn.score(X_test, y_test):.3f}')

In [None]:
# Make prediction based on user input
h = eval(input('Please enter height (cm): '))
w = eval(input('Please enter weight (kg): '))
q = scaler.transform(np.array([[h, w]]))

res1 = knn.predict(q)
print(f'kNN predicts: {res1}')