# Classification
### Target: Magnitude Discrete

### Task:
- Load dataset
- Split data to X (attributes) & y (target)
- Split dataset to train & test dataset
- Train models
- Predict target attribute.
- Calculate prediction scores


In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
import graphviz
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('final_discrete.csv', index_col = 0)

In [3]:
df.head()

Unnamed: 0,Latitude,Longitude,Depth,Magnitude Type,Root Mean Square,Source,Location Source,Magnitude Source,Magnitude Discrete,date_parsed
0,19.246,145.616,0.189274,5,0.317635,3,19,10,Big,-157630542.0
1,1.863,127.352,0.115675,5,0.317635,3,19,10,Medium,-157465811.0
2,-20.579,-173.972,0.030096,5,0.317635,3,19,10,Big,-157355642.0
3,-59.076,-23.557,0.022964,5,0.317635,3,19,10,Medium,-157093817.0
4,11.938,126.427,0.022964,5,0.317635,3,19,10,Medium,-157026430.0


## X attributes

In [4]:
X = df.drop('Magnitude Discrete', 1)
X.head()

Unnamed: 0,Latitude,Longitude,Depth,Magnitude Type,Root Mean Square,Source,Location Source,Magnitude Source,date_parsed
0,19.246,145.616,0.189274,5,0.317635,3,19,10,-157630542.0
1,1.863,127.352,0.115675,5,0.317635,3,19,10,-157465811.0
2,-20.579,-173.972,0.030096,5,0.317635,3,19,10,-157355642.0
3,-59.076,-23.557,0.022964,5,0.317635,3,19,10,-157093817.0
4,11.938,126.427,0.022964,5,0.317635,3,19,10,-157026430.0


## Target attribute

In [5]:
y = df['Magnitude Discrete']
y.head()

0       Big
1    Medium
2       Big
3    Medium
4    Medium
Name: Magnitude Discrete, dtype: object

## Split data to train & test dataset

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=8)

### Train Decision Tree Classifier

In [17]:
dt_model = DecisionTreeClassifier(criterion='entropy', 
                                  random_state=8)
dt_model.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=8,
            splitter='best')

In [18]:
predicted_y = dt_model.predict(X_test)
accuracy = accuracy_score(y_test, predicted_y)
print("Test Accuracy: {}%".format(accuracy * 100))

Test Accuracy: 60.47482389773023%


In [19]:
predicted_y = dt_model.predict(X_train)
accuracy = accuracy_score(y_train, predicted_y)
print("Train Accuracy: {}%".format(accuracy * 100))

Train Accuracy: 100.0%


In [16]:
# dot_data = tree.export_graphviz(dt_model, out_file=None)

# graph = graphviz.Source(dot_data)
# graph.render('earthquake')
# graph

### Train K-Nearest Neighbors Classifier

In [11]:
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train.values.ravel())
predicted_y = knn_model.predict(X_test)
accuracy = accuracy_score(y_test, predicted_y)
print("Test Accuracy: {}%".format(accuracy * 100))

Test Accuracy: 54.89172971562745%


In [12]:
knn_model.fit(X_train, y_train.values.ravel())
predicted_y = knn_model.predict(X_train)
accuracy = accuracy_score(y_train, predicted_y)
print("Train Accuracy: {}%".format(accuracy * 100))

Train Accuracy: 71.03386236586776%


### Train Gaussian Naive Bayes

In [13]:
gnb_model = GaussianNB()
gnb_model.fit(X_train, y_train.values.ravel())
predicted_y = gnb_model.predict(X_test)
accuracy = accuracy_score(y_test, predicted_y)
print("Test Accuracy: {}%".format(accuracy * 100))

Test Accuracy: 60.34437777198017%


In [14]:
gnb_model.fit(X_train, y_train.values.ravel())
predicted_y = gnb_model.predict(X_train)
accuracy = accuracy_score(y_train, predicted_y)
print("Train Accuracy: {}%".format(accuracy * 100))

Train Accuracy: 60.31613442138405%
