In [None]:
# Import
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, log_loss

In [None]:
# Read & Preprocessing Data
crimeRaw = pd.read_csv("data/Crime/crime_train.csv").drop(['Resolution', 'Address', 'Descript', 'Dates'], axis=1)
### crimeRaw['Dates'] = pd.to_datetime(crimeRaw['Dates'], format='%Y-%m-%d %H:%M:%S')
crimeLE = LabelEncoder()
crime = pd.DataFrame()
crime['Category'] = crimeLE.fit_transform(crimeRaw['Category'])
for col in crimeRaw:
    if crimeRaw.dtypes[col] == "object":
        tempLE = LabelEncoder()
        crime[col] = tempLE.fit_transform(crimeRaw[col])

In [None]:
# Prepare Data for shorttime test
yTrain = crime['Category']
xTrain = crime.drop('Category', axis=1)

In [None]:
# Train
cl = RandomForestClassifier(n_estimators=200, random_state=0)
rs = cl.fit(xTrain, yTrain)

In [None]:
# Do Predict
pr = rs.predict(crime.drop(['Category'], axis=1))
pp = rs.predict_proba(crime.drop(['Category'], axis=1))

In [None]:
# Print accuracy
print("Accuracy: ", accuracy_score(crime['Category'], pr))
# Without Address: 0.44 (Estimator=5), 0.45 (Estimator=10)
# With Address: 0.46 (Estimator=5), 0.47 (Estimator=10)

In [None]:
# Print loss
print("Multiclass Loss: ", log_loss(crimeLE.inverse_transform(crime['Category']), pp, labels=crimeLE.classes_))

In [None]:
# KFold for test
scores = np.zeros(10)
cv = KFold(10, shuffle=True, random_state=0)
for i, (idxTrain, idxTest) in enumerate(cv.split(crime)):
    crimeTrain = crime.iloc[idxTrain]
    crimeTest = crime.iloc[idxTest]
    
    # Prepare training data
    yTrain = crimeTrain['Category']
    xTrain = crimeTrain.drop('Category', axis=1)
    
    # Fitting
    cl = RandomForestClassifier(n_estimators=20, random_state=2)
    rs = cl.fit(xTrain, yTrain)
    
    pr = rs.predict(crimeTest.drop('Category', axis=1))
    scores[i] = accuracy_score(crimeTest['Category'], pr)

np.mean(scores)