In [1]:
import pickle
import numpy as np
import cv2
import sys
import os
import glob
from sklearn import svm
from scipy.stats import mode
from sklearn.ensemble import RandomForestClassifier
from tqdm import tqdm

In [2]:
path='./data/raw_frames/hocky/fi1_xvid'
pickle_name='video_summary.pkl'

In [3]:
file_path=os.path.join(path,pickle_name)

In [4]:
with open(file_path, 'rb') as f:
    data = pickle.load(f)

In [5]:
data

{'images_path': 'data/raw_frames/hocky/fi1_xvid',
 'name': 'fi1_xvid',
 'images_files': ['data/raw_frames/hocky/fi1_xvid/frame_0.jpg',
  'data/raw_frames/hocky/fi1_xvid/frame_2.jpg',
  'data/raw_frames/hocky/fi1_xvid/frame_4.jpg',
  'data/raw_frames/hocky/fi1_xvid/frame_6.jpg',
  'data/raw_frames/hocky/fi1_xvid/frame_8.jpg',
  'data/raw_frames/hocky/fi1_xvid/frame_10.jpg',
  'data/raw_frames/hocky/fi1_xvid/frame_12.jpg',
  'data/raw_frames/hocky/fi1_xvid/frame_14.jpg',
  'data/raw_frames/hocky/fi1_xvid/frame_16.jpg',
  'data/raw_frames/hocky/fi1_xvid/frame_18.jpg',
  'data/raw_frames/hocky/fi1_xvid/frame_20.jpg',
  'data/raw_frames/hocky/fi1_xvid/frame_22.jpg',
  'data/raw_frames/hocky/fi1_xvid/frame_24.jpg',
  'data/raw_frames/hocky/fi1_xvid/frame_26.jpg',
  'data/raw_frames/hocky/fi1_xvid/frame_28.jpg',
  'data/raw_frames/hocky/fi1_xvid/frame_30.jpg',
  'data/raw_frames/hocky/fi1_xvid/frame_32.jpg',
  'data/raw_frames/hocky/fi1_xvid/frame_34.jpg',
  'data/raw_frames/hocky/fi1_xvid/fr

In [6]:
data['images_files']

['data/raw_frames/hocky/fi1_xvid/frame_0.jpg',
 'data/raw_frames/hocky/fi1_xvid/frame_2.jpg',
 'data/raw_frames/hocky/fi1_xvid/frame_4.jpg',
 'data/raw_frames/hocky/fi1_xvid/frame_6.jpg',
 'data/raw_frames/hocky/fi1_xvid/frame_8.jpg',
 'data/raw_frames/hocky/fi1_xvid/frame_10.jpg',
 'data/raw_frames/hocky/fi1_xvid/frame_12.jpg',
 'data/raw_frames/hocky/fi1_xvid/frame_14.jpg',
 'data/raw_frames/hocky/fi1_xvid/frame_16.jpg',
 'data/raw_frames/hocky/fi1_xvid/frame_18.jpg',
 'data/raw_frames/hocky/fi1_xvid/frame_20.jpg',
 'data/raw_frames/hocky/fi1_xvid/frame_22.jpg',
 'data/raw_frames/hocky/fi1_xvid/frame_24.jpg',
 'data/raw_frames/hocky/fi1_xvid/frame_26.jpg',
 'data/raw_frames/hocky/fi1_xvid/frame_28.jpg',
 'data/raw_frames/hocky/fi1_xvid/frame_30.jpg',
 'data/raw_frames/hocky/fi1_xvid/frame_32.jpg',
 'data/raw_frames/hocky/fi1_xvid/frame_34.jpg',
 'data/raw_frames/hocky/fi1_xvid/frame_36.jpg',
 'data/raw_frames/hocky/fi1_xvid/frame_38.jpg']

In [7]:
featuresLimit = 15000

In [8]:
def featureExtraction( videoPath, actionName, type):


    # Set frame path, if jpeg directory doesn't exist , take images from video dir
    framePath = videoPath
    if os.path.exists( framePath + "/jpeg") :
        framePath += "/jpeg/"

    # Extract feature
#     imageFrames = getImageList(framePath)

    pickle_name='video_summary.pkl'
    file_path=os.path.join(videoPath,pickle_name)
    with open(file_path, 'rb') as f:
        data = pickle.load(f)
        imageFrames=data['images_files']
    #print "DEBUG: Image Frames - ", imageFrames

    frameCount = 0
    frameIndex = 0

    # Feature List for a video
    videoFeatures  = []

    for iFrame in imageFrames:

        frameIndex += 1

        # Read Frame
        frame = cv2.imread(iFrame)
        gray=cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)

        # HOG Descriptor , default value it takes window size= 64x128, block size= 16x16, block stride= 8x8, cell size= 8x8, bins= 9
        hogDescriptor = cv2.HOGDescriptor()

        # Returns histogram
        hist = hogDescriptor.compute(gray)

        #sortedHogDescriptor = hogDescriptor
        sortedHogHist = np.sort(hist, axis=None)

        keyFeatures = sortedHogHist[- featuresLimit : ]

        if type == "Trng":
            keyFeatures = np.insert(keyFeatures, 0, sportsActionTag[actionName])

        videoFeatures.append(keyFeatures)

        # Lowest number of frame available in a video
        if frameCount >= 23:
            break

        frameCount += 1


    return videoFeatures

In [9]:
def getImageList(imageDirectory):

    # Find different type of images
    rImages = glob.glob(imageDirectory + "/*.jpg")
    rImages +=  glob.glob(imageDirectory + "/*.jpeg")
    rImages +=  glob.glob(imageDirectory + "/*.png")

    return rImages

In [10]:
sportsActionTag = {
    'no_fight': 0,
    'fight':1
}

In [11]:
videoPath='./data/raw_frames/hocky/fi1_xvid'
sportsActionName='fight'

In [12]:
videoFeatures = featureExtraction(videoPath , sportsActionName, 'Trng')

In [13]:
len(videoFeatures)

20

In [14]:
print(videoFeatures[0])

[1.         0.62515384 0.62515384 ... 0.9804249  0.9804249  0.9804249 ]


In [15]:
def getListOfDir(path):
    # Read each sport action directory
    dirs  = os.listdir(path)

    sportsActionsCount = 0
    filtered_dir  = []
    # Remove . .. and hidden directory
    for dir in dirs:
        if not dir.startswith("."):
            filtered_dir.append(dir)

    return filtered_dir

In [16]:
sportsActionPath='./data/raw_frames/hocky'
sportsActionList = getListOfDir( sportsActionPath )

In [17]:
print(sportsActionList[:5])

['fi100_xvid', 'fi101_xvid', 'fi102_xvid', 'fi103_xvid', 'fi104_xvid']


In [18]:

firstActionFlag = 0
videoFeatures = []
videoCount=1
for sportsActionName in tqdm(sportsActionList):
#     if(videoCount==10):
#         break
    sportsActionDir = sportsActionPath + "/" + sportsActionName
    if(sportsActionName.startswith("fi")):
        sportsActionName='fight'
    elif(sportsActionName.startswith("no")):
        sportsActionName='no_fight'
        
    videoFeatures = featureExtraction(sportsActionDir , sportsActionName, 'Trng')
#     print(len(videoFeatures))
    # Put together all the videos
    if firstActionFlag == 0:
        sportsActionFeatures = videoFeatures
        firstActionFlag = 1
    else:
        sportsActionFeatures = np.concatenate( (sportsActionFeatures, videoFeatures), axis=0)

    videoCount += 1
    
    

100%|██████████| 1000/1000 [9:53:31<00:00,  5.27s/it]    


In [19]:
# np.random.shuffle(sportsActionFeatures)

In [20]:
# sportsActionFeatures.shape

In [21]:

# for featureAndLabel in tqdm(sportsActionFeatures):
#     label.append(int(featureAndLabel[0]))
#     feature.append((np.delete(featureAndLabel, 0)).tolist())
# print(len(label))

In [22]:
# print(label[-5:])
sportsActionNumber = len(sportsActionTag)

In [23]:
def evaluation( truth, predicted, categoryIndex ):

    # TP,FP,FN,TN indicate True Positive, False Positive, False Negative, True Negative respectively
    TP = 1
    FP = 1
    FN = 1
    TN = 1

    # Categories are Sports Action 1=>0, Sports Action 2=> 1, Sports Action 3=>2  etc..
    for fIndex in range(len(truth)):

         # Positive prediction for each feature
        if ( int(predicted[fIndex]) == categoryIndex):
            # TP=> when P[i] = T[i] = Ci
            if (int(truth[fIndex]) == int (predicted[fIndex])):
                TP += 1
            else:
                FP += 1
        else: # Negative Prediction
            if ( int ( truth[fIndex]) == categoryIndex ):
                FN += 1
            else:
                TN += 1


    # Calculate Sensitivity - True Positive Rate - Recall
    sensitivity = TP / float ( TP + FN )

    # Specificity - True Negative Rate
    specificity = TN / float ( TN + FP )

    #Calculate accuracy
    accuracy =  ( TP + TN ) / float ( TP + FP + FN + TN )


    return sensitivity, specificity, accuracy

In [24]:
def getSportsActionName(saIndex):

    keys   = sportsActionTag.keys()

    for key in keys:
        if saIndex == sportsActionTag[key]:
            return key

In [25]:
def crossValidation( featureAndLabelList):

    # Randomize the sample
    np.random.shuffle(featureAndLabelList)


    # Evaluation Metrics
    sensitivity = 0.0
    specificity = 0.0
    accuracy    = 0.0


    # split feature set in equal subsets same as number of sports actions for cross validation
#     subsetLength =  len(featureAndLabelList) / sportsActionNumber
    subsetLength =  int(len(featureAndLabelList)*0.8)
    for rIndex in range(sportsActionNumber):

        print("INFO: Cross Validation Iteration - ", rIndex)
        trainigSet   = []
        valdationSet = []
        feature = []
        label   = []


        if ( rIndex == 0 ):
            trainigSet = featureAndLabelList[1*subsetLength:]
            valdationSet = featureAndLabelList[0: subsetLength]
        elif ( rIndex == (sportsActionNumber -1) ):
            trainigSet = featureAndLabelList[:(sportsActionNumber -1)*subsetLength]
            valdationSet = featureAndLabelList[(sportsActionNumber -1)*subsetLength : ]
        else:
            trainigSet = np.concatenate ((featureAndLabelList[:rIndex * subsetLength] , featureAndLabelList[(rIndex + 1) * subsetLength: ]), axis=0 )
            valdationSet = featureAndLabelList[rIndex * subsetLength : (rIndex + 1 ) * subsetLength]

        # Get all features in a array
        for featureAndLabel in trainigSet:
            label.append(int(featureAndLabel[0]))
            feature.append((np.delete(featureAndLabel, 0)).tolist())


        # Train model
        print("INFO: Training ... ")
        clf=RandomForestClassifier(n_estimators=13)
        clf=clf.fit(feature,label)

        # Prepare validation feature and label to be predicted
        print("INFO: Prediction for ", getSportsActionName(rIndex))
        vFeatureList = []
        vLabelList   = [] # Ground Truth
        for featureAndLabel in valdationSet:
            vFeatureList.append(featureAndLabel[1:].tolist())
            vLabelList.append(featureAndLabel[0])

        # Predict the class label for Validation Feature List
        predictedLabel = clf.predict(vFeatureList)

        # predict validation set and calculate accuracy
        print("INFO: Evaluating ... ")

        # Evaluation < Truth>, <Predicted>, <Sports Action Index>
        (sen, spec , accu ) = evaluation(vLabelList , predictedLabel.tolist() , rIndex)

        sensitivity += sen
        specificity += spec
        accuracy    += accu

        print("\t   Sensitivity : ", sen)
        print("\t   Specificity : ", spec)
        print("\t   Accuracy    : ", accu)


    # Average evaluation metrics
    avgSensitivity = sensitivity / sportsActionNumber
    avgSpecificity = specificity / sportsActionNumber
    avgAccuracy = accuracy / sportsActionNumber


    print("  *** Overall Evaluation ***")
    print("    Average Sensitivity: ", avgSensitivity)
    print("    Average Specificity: ", avgSpecificity)
    print("    Average Accuracy   : ", avgAccuracy)


In [26]:
print(sportsActionFeatures.shape)

(20032, 15001)


In [27]:
# crossValidation(sportsActionFeatures)
featureAndLabelList=sportsActionFeatures
np.random.shuffle(featureAndLabelList)

In [28]:
subsetLength =  int(len(featureAndLabelList)*0.8)
trainigSet = featureAndLabelList[:subsetLength]
valdationSet = featureAndLabelList[subsetLength : ]

In [30]:
# Get all features in a array
label = []
feature = []
for featureAndLabel in trainigSet:
    label.append(int(featureAndLabel[0]))
    feature.append((np.delete(featureAndLabel, 0)).tolist())

In [31]:
# Train model
print("INFO: Training ... ")
clf=RandomForestClassifier(n_estimators=13)
clf=clf.fit(feature,label)

# Prepare validation feature and label to be predicted
# print("INFO: Prediction for ", getSportsActionName(rIndex))
vFeatureList = []
vLabelList   = [] # Ground Truth
for featureAndLabel in valdationSet:
    vFeatureList.append(featureAndLabel[1:].tolist())
    vLabelList.append(featureAndLabel[0])

INFO: Training ... 


In [32]:
# Predict the class label for Validation Feature List
predictedLabel = clf.predict(vFeatureList)

# predict validation set and calculate accuracy
print("INFO: Evaluating ... ")

# Evaluation < Truth>, <Predicted>, <Sports Action Index>
(sen, spec , accu ) = evaluation(vLabelList , predictedLabel.tolist() , 0)
(sen1, spec1 , accu1 ) = evaluation(vLabelList , predictedLabel.tolist() , 1)

INFO: Evaluating ... 


In [33]:
# Evaluation Metrics
sensitivity = 0.0
specificity = 0.0
accuracy    = 0.0

sensitivity += sen
specificity += spec
accuracy    += accu

sensitivity += sen1
specificity += spec1
accuracy    += accu1
# print("\t   Sensitivity : ", sen)
# print("\t   Specificity : ", spec)
# print("\t   Accuracy    : ", accu)
# Average evaluation metrics
avgSensitivity = sensitivity / sportsActionNumber
avgSpecificity = specificity / sportsActionNumber
avgAccuracy = accuracy / sportsActionNumber


print("  *** Overall Evaluation ***")
print("    Average Sensitivity: ", avgSensitivity)
print("    Average Specificity: ", avgSpecificity)
print("    Average Accuracy   : ", avgAccuracy)

  *** Overall Evaluation ***
    Average Sensitivity:  0.7569041874262317
    Average Specificity:  0.7569041874262317
    Average Accuracy   :  0.7571677885814011


In [None]:
from sklearn import svm

In [35]:
print("INFO: Training ... ")
clf = svm.SVC(gamma=0.01, C=13)
clf.fit(feature,label)

INFO: Training ... 


SVC(C=13, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [36]:
# Predict the class label for Validation Feature List
predictedLabel = clf.predict(vFeatureList)

# predict validation set and calculate accuracy
print("INFO: Evaluating ... ")

# Evaluation < Truth>, <Predicted>, <Sports Action Index>
(sen, spec , accu ) = evaluation(vLabelList , predictedLabel.tolist() , 0)
(sen1, spec1 , accu1 ) = evaluation(vLabelList , predictedLabel.tolist() , 1)

INFO: Evaluating ... 


In [37]:
# Evaluation Metrics
sensitivity = 0.0
specificity = 0.0
accuracy    = 0.0

sensitivity += sen
specificity += spec
accuracy    += accu

sensitivity += sen1
specificity += spec1
accuracy    += accu1
# print("\t   Sensitivity : ", sen)
# print("\t   Specificity : ", spec)
# print("\t   Accuracy    : ", accu)
# Average evaluation metrics
avgSensitivity = sensitivity / sportsActionNumber
avgSpecificity = specificity / sportsActionNumber
avgAccuracy = accuracy / sportsActionNumber


print("  *** Overall Evaluation ***")
print("    Average Sensitivity: ", avgSensitivity)
print("    Average Specificity: ", avgSpecificity)
print("    Average Accuracy   : ", avgAccuracy)

  *** Overall Evaluation ***
    Average Sensitivity:  0.7186830863075654
    Average Specificity:  0.7186830863075654
    Average Accuracy   :  0.7190226876090751
