In [1]:
from os import listdir
from os.path import isfile, join
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import pickle

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
with open('drive/MyDrive/RealLifeViolencePickle/training_violence2.pickle', 'rb') as handle:
    allTrainingData = pickle.load(handle)

In [4]:
allTrainingData.shape

(21508, 3780)

In [5]:
with open('drive/MyDrive/RealLifeViolencePickle/labels_violence2.pickle', 'rb') as labels:
    listOfFrameLabels = pickle.load(labels)

In [6]:
framesTrain, framesTest, labelsTrain, labelsTest = train_test_split(allTrainingData, listOfFrameLabels, test_size= 0.30)
svcClassifier = LinearSVC(random_state=0)
svcClassifier.fit(framesTrain,labelsTrain)
labelPredict = svcClassifier.predict(framesTest)
print("---------------SVM---------------")
print(confusion_matrix(labelsTest, labelPredict))  
print(classification_report(labelsTest, labelPredict)) 
print('Total Accuracy: ',accuracy_score(labelsTest,labelPredict))

---------------SVM---------------
[[3608  420]
 [ 759 1666]]
              precision    recall  f1-score   support

           0       0.83      0.90      0.86      4028
           1       0.80      0.69      0.74      2425

    accuracy                           0.82      6453
   macro avg       0.81      0.79      0.80      6453
weighted avg       0.82      0.82      0.81      6453

Total Accuracy:  0.8172942817294282




In [7]:
print("---------------KNN---------------")
neigh = KNeighborsClassifier(n_neighbors=5)
neigh.fit(framesTrain, labelsTrain)
labelPredict = neigh.predict(framesTest)
print(confusion_matrix(labelsTest, labelPredict))
print(classification_report(labelsTest, labelPredict))
print('Total Accuracy: ',accuracy_score(labelsTest,labelPredict))

---------------KNN---------------
[[3415  613]
 [ 272 2153]]
              precision    recall  f1-score   support

           0       0.93      0.85      0.89      4028
           1       0.78      0.89      0.83      2425

    accuracy                           0.86      6453
   macro avg       0.85      0.87      0.86      6453
weighted avg       0.87      0.86      0.86      6453

Total Accuracy:  0.8628544862854486


In [8]:
print("---------------Logistic Regression---------------")
logreg = LogisticRegression()
logreg.fit(framesTrain, labelsTrain)
labelPredict = logreg.predict(framesTest)
print(confusion_matrix(labelsTest, labelPredict))
print(classification_report(labelsTest, labelPredict))
print('Total Accuracy: ',accuracy_score(labelsTest,labelPredict))

---------------Logistic Regression---------------
[[3509  519]
 [ 406 2019]]
              precision    recall  f1-score   support

           0       0.90      0.87      0.88      4028
           1       0.80      0.83      0.81      2425

    accuracy                           0.86      6453
   macro avg       0.85      0.85      0.85      6453
weighted avg       0.86      0.86      0.86      6453

Total Accuracy:  0.8566558189989152


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [9]:
print("---------------Decision Tree---------------")
clf_model = DecisionTreeClassifier(criterion="gini", random_state=42,max_depth=3, min_samples_leaf=5)   
clf_model.fit(framesTrain, labelsTrain)
labelPredict = clf_model.predict(framesTest)
print(confusion_matrix(labelsTest, labelPredict))
print(classification_report(labelsTest, labelPredict))
print('Total Accuracy: ',accuracy_score(labelsTest,labelPredict))

---------------Decision Tree---------------
[[3263  765]
 [ 123 2302]]
              precision    recall  f1-score   support

           0       0.96      0.81      0.88      4028
           1       0.75      0.95      0.84      2425

    accuracy                           0.86      6453
   macro avg       0.86      0.88      0.86      6453
weighted avg       0.88      0.86      0.86      6453

Total Accuracy:  0.8623895862389587


In [11]:
print("---------------LDA---------------")
clf = LinearDiscriminantAnalysis()
clf.fit(framesTrain, labelsTrain)
labelPredict = clf.predict(framesTest)
print(confusion_matrix(labelsTest, labelPredict))
print(classification_report(labelsTest, labelPredict))
print('Total Accuracy: ',accuracy_score(labelsTest,labelPredict))

---------------LDA---------------
[[3463  565]
 [ 224 2201]]
              precision    recall  f1-score   support

           0       0.94      0.86      0.90      4028
           1       0.80      0.91      0.85      2425

    accuracy                           0.88      6453
   macro avg       0.87      0.88      0.87      6453
weighted avg       0.89      0.88      0.88      6453

Total Accuracy:  0.8777312877731288


In [10]:
from sklearn.ensemble import GradientBoostingClassifier
gb_clf = GradientBoostingClassifier(n_estimators=20, learning_rate=0.1, max_features=2, max_depth=2, random_state=0)
gb_clf.fit(framesTrain, labelsTrain)
labelPredict = gb_clf.predict(framesTest)
print(confusion_matrix(labelsTest, labelPredict))
print(classification_report(labelsTest, labelPredict))
print('Total Accuracy: ',accuracy_score(labelsTest,labelPredict))

[[3300  728]
 [ 169 2256]]
              precision    recall  f1-score   support

           0       0.95      0.82      0.88      4028
           1       0.76      0.93      0.83      2425

    accuracy                           0.86      6453
   macro avg       0.85      0.87      0.86      6453
weighted avg       0.88      0.86      0.86      6453

Total Accuracy:  0.8609948860994886


In [12]:
from sklearn.ensemble import RandomForestClassifier
clf=RandomForestClassifier(n_estimators=100)
clf.fit(framesTrain, labelsTrain)
labelPredict = clf.predict(framesTest)
print(confusion_matrix(labelsTest, labelPredict))
print(classification_report(labelsTest, labelPredict))
print('Total Accuracy: ',accuracy_score(labelsTest,labelPredict))

[[3404  624]
 [  24 2401]]
              precision    recall  f1-score   support

           0       0.99      0.85      0.91      4028
           1       0.79      0.99      0.88      2425

    accuracy                           0.90      6453
   macro avg       0.89      0.92      0.90      6453
weighted avg       0.92      0.90      0.90      6453

Total Accuracy:  0.899581589958159
