In [23]:
import numpy
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

seed = 7
numpy.random.seed(seed)



In [24]:
arrest_dataframe = pd.read_csv('BPD_Arrests_sanitized.csv')
print len(arrest_dataframe)
arrest_dataframe.head(2)

48278


Unnamed: 0.1,Unnamed: 0,Arrest,Age,Sex,Race,ArrestDate,ArrestTime,ArrestLocation,IncidentOffense,IncidentLocation,Charge,ChargeDescription,District,Neighborhood,ArrestDay,NormalizedArrestDate,ArrestHour
0,1,16147119.0,33.0,M,B,10/15/2016,23:00,100 W NORTH AVE,4CAGG. ASSLT.- OTH.,W NORTH AV & N CHARLES ST,1 1415,DOMESTIC ASSAULT,Central,Charles North,Saturday,2016-10-15,23
1,2,16147109.0,27.0,M,B,10/15/2016,22:45,2300 E JEFFERSON ST,4CAGG. ASSLT.- OTH.,2300 JEFFERSON ST,1 1420,ASSAULT,Southeastern,McElderry Park,Saturday,2016-10-15,22


In [25]:
print 'len(arrest_dataframe)', len(arrest_dataframe)
print 'Arrest :', len(arrest_dataframe['Arrest'].unique())
print 'Age', len(arrest_dataframe['Age'].unique())
print 'Sex', len(arrest_dataframe['Sex'].unique())
print 'Race', len(arrest_dataframe['Race'].unique())
print 'ArrestDate', len(arrest_dataframe['ArrestDate'].unique())
print 'ArrestTime', len(arrest_dataframe['ArrestTime'].unique())
print 'ArrestLocation [Many Unknowns]', len(arrest_dataframe['ArrestLocation'].unique())
print 'IncidentOffense [Many Unknowns]', len(arrest_dataframe['IncidentOffense'].unique())
print 'IncidentLocation [Many Unknowns]', len(arrest_dataframe['IncidentLocation'].unique())
print 'Charge', len(arrest_dataframe['Charge'].unique())
print 'ChargeDescription', len(arrest_dataframe['ChargeDescription'].unique())
print 'District', len(arrest_dataframe['District'].unique())
print 'Neighborhood', len(arrest_dataframe['Neighborhood'].unique())

len(arrest_dataframe) 48278
Arrest : 48078
Age 70
Sex 2
Race 5
ArrestDate 1374
ArrestTime 1672
ArrestLocation [Many Unknowns] 8327
IncidentOffense [Many Unknowns] 265
IncidentLocation [Many Unknowns] 11907
Charge 336
ChargeDescription 6901
District 10
Neighborhood 544


In [28]:
X = pd.concat([arrest_dataframe['Age'], pd.get_dummies(arrest_dataframe['Sex'], prefix = 'S'), pd.get_dummies(arrest_dataframe['Race'], prefix = 'R')], axis=1)
Y = arrest_dataframe['IncidentOffense']

print 'len(X) = ', len(X), 'len(Y) = ', len(Y)

X_cols = len(X.columns)
print X_cols
print X.columns

print len(Y.unique())
print Y.unique()

len(X) =  48278 len(Y) =  48278
8
Index([u'Age', u'S_F', u'S_M', u'R_A', u'R_B', u'R_I', u'R_U', u'R_W'], dtype='object')
265
['4CAGG. ASSLT.- OTH.' '4ECOMMON ASSAULT' '6CLARCENY- SHOPLIFTING'
 '49FAMILY DISTURBANCE' '7ASTOLEN AUTO' '4BAGG. ASSLT.- CUT'
 '54ARMED PERSON' '5ABURG. RES. (FORCE)' '3BROBB HIGHWAY (UA)'
 '118BURGLARY - FOURTH DEGREE' '4DAGG. ASSLT.- HAND' '4AAGG. ASSLT.- GUN'
 '3AFROBB HWY-FIREARM' '111PROTECTIVE ORDER' '119ISSUED IN ERROR'
 '4FASSAULT BY THREAT' '87NARCOTICS' '75DESTRUCT. OF PROPERTY' '79OTHER'
 '55APROSTITUTION' '77DOG BITE' '87ONARCOTICS (OUTSIDE)'
 '3AOROBB HWY-OTHER WPN' '6DLARCENY- FROM AUTO' '3KROBB RES. (UA)'
 '26RECOVERED VEHICLE' '5DBURG. OTH. (FORCE)' '5EBURG. OTH. (ATT.)'
 '81RECOVERED PROPERTY' '6JLARCENY- OTHER' '3JKROBB RESIDENCE-KNIFE'
 '2ARAPE (FORCE)' '23UNAUTHORIZED USE' '3CKROBB COMM-KNIFE'
 '87VNARCOTICS (ONVIEW)' '115TRESPASSING' '1AMURDER' '2BRAPE (ATTEMPT)'
 '88UNFOUNDED CALL' '3AJFROBB CARJACK-FIREARM' '24TOWED VEHICLE'
 '8AOARSON S

In [29]:
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)

In [30]:
model = Sequential()

model.add(Dense(X_cols, input_dim=X_cols, init='uniform', activation='relu'))
model.add(Dense(X_cols, init='uniform', activation='relu'))
model.add(Dense(X_cols, init='uniform', activation='relu'))
model.add(Dense(len(Y.unique()), init='uniform', activation='sigmoid'))

# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Fit model
model.fit(X.values, dummy_y, nb_epoch=50, batch_size=10, shuffle=True, verbose=1)

# Evaluate model
model.evaluate(X.values, dummy_y, batch_size=5, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50

[3.0957273087000172, 0.266166784042421]