In [56]:
# Import
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier

In [57]:
# Read & Preprocessing Data
crime = pd.read_csv("data/Crime/crime_train.csv").drop(['Resolution', 'Address', 'Descript', 'Dates'], axis=1)
crimeTest = pd.read_csv("data/Crime/crime_test.csv").drop(['Address', 'Dates'], axis=1)
### crime['Dates'] = pd.to_datetime(crime['Dates'], format='%Y-%m-%d %H:%M:%S')
### crimeTest['Dates'] = pd.to_datetime(crimeTest['Dates'], format='%Y-%m-%d %H:%M:%S')

In [58]:
# Convert 'Category' first and save it
crimeLE = LabelEncoder()
crime['Category'] = crimeLE.fit_transform(crime['Category'])

# Convert other values
for col in crime:
    if crime.dtypes[col] == "object": ### or crime.dtypes[col] == "datetime64":
        tempLE = LabelEncoder()
        crime[col] = tempLE.fit_transform(crime[col])
        if col in crimeTest.columns:
            crimeTest[col] = tempLE.transform(crimeTest[col])

In [59]:
# Prepare training data
yTrain = crime['Category']
xTrain = crime.drop(['Category'], axis=1)

In [67]:
# Fitting
cl = RandomForestClassifier(n_estimators=200, random_state=0)
rs = cl.fit(xTrain, yTrain)

In [68]:
# Predict
pr = rs.predict_proba(crimeTest.drop('id', axis=1))

In [69]:
# Print
pr

array([[0.00180471, 0.06930248, 0.00085336, ..., 0.00618044, 0.07134364,
        0.00499975],
       [0.        , 0.00252034, 0.        , ..., 0.00782634, 0.00348908,
        0.        ],
       [0.        , 0.05279253, 0.        , ..., 0.01674917, 0.10535749,
        0.04512882],
       ...,
       [0.        , 0.00071429, 0.        , ..., 0.10619367, 0.03759722,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.29988952, 0.        ,
        0.        ],
       [0.        , 0.08385805, 0.        , ..., 0.        , 0.08262816,
        0.        ]])

In [70]:
# Export
ex = pd.DataFrame(pr, columns=crimeLE.inverse_transform(rs.classes_))
ex['id'] = crimeTest['id'].values
ex['id'] = ex['id'].astype(float)
ex.to_csv("crime_result_rf.csv", index=False)
ex

Unnamed: 0,ARSON,ASSAULT,BAD CHECKS,BRIBERY,BURGLARY,DISORDERLY CONDUCT,DRIVING UNDER THE INFLUENCE,DRUG/NARCOTIC,DRUNKENNESS,EMBEZZLEMENT,...,STOLEN PROPERTY,SUICIDE,SUSPICIOUS OCC,TREA,TRESPASS,VANDALISM,VEHICLE THEFT,WARRANTS,WEAPON LAWS,id
0,0.001805,0.069302,0.000853,0.000000,0.015615,0.001397,0.000869,0.032985,0.002584,0.002979,...,0.002705,0.000877,0.042950,0.0,0.006145,0.045226,0.006180,0.071344,0.005000,1.0
1,0.000000,0.002520,0.000000,0.000000,0.040667,0.000000,0.000000,0.000000,0.000000,0.002755,...,0.000000,0.000000,0.015111,0.0,0.000000,0.003813,0.007826,0.003489,0.000000,2.0
2,0.000000,0.052793,0.000000,0.000000,0.000000,0.008138,0.000000,0.245653,0.017619,0.000000,...,0.008006,0.000000,0.015832,0.0,0.000000,0.024942,0.016749,0.105357,0.045129,3.0
3,0.000000,0.061868,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.030433,0.032760,0.059370,0.069596,0.000000,4.0
4,0.000000,0.021667,0.000000,0.000000,0.007167,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.001250,0.022083,0.000000,0.000000,5.0
5,0.000000,0.119874,0.000000,0.000000,0.060705,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,6.0
6,0.000000,0.017583,0.000000,0.000000,0.046357,0.000000,0.000000,0.015000,0.000000,0.000000,...,0.000000,0.000000,0.031429,0.0,0.000000,0.000714,0.134972,0.000000,0.000000,7.0
7,0.000000,0.069106,0.000000,0.000000,0.000000,0.000000,0.000000,0.314751,0.010223,0.000000,...,0.000000,0.000000,0.019008,0.0,0.000000,0.018677,0.004802,0.111832,0.004720,8.0
8,0.001207,0.072518,0.000370,0.000424,0.014662,0.004759,0.000358,0.027707,0.003100,0.001139,...,0.005963,0.001079,0.050254,0.0,0.006332,0.036153,0.004621,0.079183,0.006118,9.0
9,0.000000,0.044575,0.000000,0.000000,0.000000,0.000000,0.000000,0.501615,0.016255,0.000000,...,0.014270,0.000000,0.028243,0.0,0.000000,0.000000,0.000000,0.146634,0.000000,10.0
