# First Prediction on Teams Dataset

## Section: Import Data
This section focuses on libraries and datasets importation.
The process involves:
- import pandas library
- import teams dataset

In [None]:
import pandas as pd

pd.set_option('display.max_columns', None)

teams_raw = pd.read_csv('basketballPlayoffs/teams.csv', delimiter=",")

## Section: Data Cleaning
In this section the data is cleaned. The process involves:
- Drop all columns with redundant and null values
- Drop all columns related to post season

In [None]:
# drop data with redundant and null values
teams = teams_raw.drop(columns=['lgID', 'divID', 'seeded', 'tmORB', 'tmDRB', 'tmTRB', 'opptmORB', 'opptmDRB', 'opptmTRB'])
# drop data related to post season
teams = teams.drop(columns=['rank', 'firstRound', 'semis', 'finals'])

teams.head()
#teams.to_csv('teams_final.csv', index=False)

## Section: Enconding
This section focuses on the enconding of the dataset using Label Enconding. The process involves:
- Import LabelEnconder from sklearn.preprocessing library
- Apply the label encoding to non numeric values: teamID, franchID, confID, name, arena

In [None]:
from sklearn.preprocessing import LabelEncoder

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Apply label encoding to the columns 'teamID', 'franchID', 'confID', 'name', 'arena'
teams['tmID'] = label_encoder.fit_transform(teams['tmID'])
teams['franchID'] = label_encoder.fit_transform(teams['franchID'])
teams['confID'] = label_encoder.fit_transform(teams['confID'])
teams['name'] = label_encoder.fit_transform(teams['name'])
teams['arena'] = label_encoder.fit_transform(teams['arena'])

print(teams)

## Section: Decision Tree
This section focuses on applying Decision Tree model. The process involves:
- Import train_test_split and DecisionTreeClassifier
- Split the data into features and the target variable
- Split the data into training and testing sets
- Create and train the Decision Tree model
- Prediction
- Show the results

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Split the data into features (X) and the target variable (y)
X = teams.drop('playoff', axis=1)
y = teams['playoff']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create and train the Decision Tree model
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, y_train)


In [None]:
y_pred = decision_tree.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)

print('Accuracy = ' , accuracy)
print(report)
print(confusion)

## Applying KNN
This section focuses on applying K-Nearest Neighbors model. The process involves:
- Import train_test_split and KNeighborsClassifier
- Split the data into features and the target variable
- Split the data into training and testing sets
- Set neighbors number to 19
- Create and train the model
- Prediction
- Show the results

In [None]:
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.model_selection import train_test_split 

# Split the data into features (X) and the target variable (y)
X = teams.drop('playoff', axis=1)
y = teams['playoff']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

knn = KNeighborsClassifier(n_neighbors=19)

knn.fit(X_train, y_train)

print(knn.predict(X_test))
print('Accuracy: ', knn.score(X_test, y_test))