In [1]:
import numpy as np
import pandas as pd
import matplotlib as plt
import datetime
import random
# Using Skicit-learn to split data into training and testing sets
from sklearn.model_selection import train_test_split
# Import the model we are using
from sklearn.ensemble import RandomForestClassifier

#turns seconds into a string format ffmpeg uses
def seconds_formatter(sec):
    x = "0" + str(datetime.timedelta(seconds=sec))[:-3]
    return x
frame_rate = 119.88
def frame_to_time(frame):
    return round(frame/frame_rate, 3)
def time_to_frame(time):
    return int(round(time*frame_rate))

# Random Forest Classifier
identify reaches, successes, failures

### Functions for Creating Classifier

In [27]:
def get_dataset(df, start_frames, params):
    """ 
    For each frame in start frame, returns features surrounding it's place in the dataframe,
    as dictated by params.
  
    Parameters: 
    df (PANDAS dataframe): source of features
    start_frames (list of ints): list of frame numbers, each frame approx. being the start of a attempt/non-attempt
    params (parameter object): stores info like the number of frames to grab before, columns, test_fraction
      
    Returns: 
    numpy array: set of features according to start_frames and params
    """
    dataframe = df[params.cols]
    dataset = []
    for start_f in start_frames:
        #take splice of dataframe, starting at start_f, going frames_before before, frames_after after
        temp_df = dataframe.iloc[start_f - params.frames_before: start_f + params.frames_after]
        if len(temp_df.values.flatten()) == 0:
            raise ValueError("This frame is likely too close to the start/end:", start_f)
        dataset.append(temp_df.values.flatten())
    return np.asarray(dataset)
def get_nonattempt_times(num_times, blacklist, avoidance_radius):
    """
    Return random times where there are no attempts. Avoid times in blacklist (avoids returning features of an 
    attempt)
    
    Parameters:
    num_times (int): number of non-attempt-examples to return
    blacklist (sorted list of floats): list of time where there's an attempt (we want to avoid them)
    avoidance_radius (float): how long before and after an item in blacklist to avoid. This is crude 
    because features are not centered, etc
    
    Returns:
    list of floats: list of times where there is not an attempt. Could be time where mice is absent, pooping, 
    etc. Crude: does not ever return times after the last item in blacklist
    """
    nonattempts = []
    random.seed(3)
    while len(nonattempts) < num_times:
        #generate random time
        potential_nonattempt_time = random.randint(10, int(blacklist[-1]))
        
        #check is time is within avoidance radius
        too_close = False
        for start_time in blacklist:
            if abs(potential_nonattempt_time - start_time) < avoidance_radius:
                too_close = True
                break
        if not too_close:
            nonattempts.append(potential_nonattempt_time)
    return nonattempts

#Use training features and labels to create Random Forest Classifier
def create_classifier(features, labels, params):
    """
    Returns Random Forest Classifier, fitted using (potentially split) features and  labels, according to params
    
    Parameters:
    features (2d ndarray): array of features(also arrays), one for each attempt/nonattempt
    labels (list): list of labels(string), one for each attempt/nonattempt
    
    Returns:
    tuple , (RandomForestClassifier, list): first item in tuple is the classifier, second being a list of 
    wrong labels from test split (may be none)
    """
    #initiate RFC
    rf = RandomForestClassifier(n_estimators=params.n_trees, random_state=params.seed, max_depth=params.depth, max_features=params.max_feats)

    #if test_fraction is 0, just use feature/labels to train
    if params.test_fraction == 0:
        rf.fit(features, labels)
        return rf, None
    
    #split features/labels randomly, according to test_fraction
    train_feats, test_feats, train_labs, test_labs = train_test_split(features, labels, 
                                                                      test_size=params.test_fraction, 
                                                                      random_state=params.seed)
    print('Training Features Shape:', train_feats.shape, 'Training Labels Shape:', train_labs.shape)
    print('Testing Features Shape:', test_feats.shape, 'Testing Labels Shape:', test_labs.shape)
    rf.fit(train_feats, train_labs)
    
    # Use the forest's predict method on the test data
    predictions = rf.predict(test_feats)
    print(predictions, test_labs)
    
    #compile and return wrong predictions (second element in tuple)
    wrong_labels = []
    for i in range(len(predictions)):
        if predictions[i] != test_labs[i]:
            wrong_labels.append((predictions[i], test_labs[i]))
    print('Wrong Labels', wrong_labels)
    return rf, wrong_labels

In [34]:
?parameters

In [33]:
class parameters:
    """
    Container object for parameters. attributes are found Init signature
    Methods:
        copy(self): creates copy of self
    """
    def __init__(self, cols, frames_before, frames_after, test_fraction, step_size=1, n_trees=1000, seed=1, 
                 depth=None, max_feats='auto', verbose=False):
        self.cols = cols
        self.frames_before = frames_before
        self.frames_after = frames_after
        self.test_fraction=test_fraction
        self.step_size = step_size
        self.n_trees=n_trees
        self.seed=seed
        self.depth=depth
        self.max_feats=max_feats
        self.verbose=verbose
    def copy(self):
        """returns a copy of self, to avoid going back and forth on changes"""
        return parameters(self.cols, self.frames_before, self.frames_after, self.test_fraction, 
                        step_size=self.step_size, n_trees=self.n_trees, seed=self.seed, depth=self.depth, 
                        max_feats=self.max_feats)
    def __repr__(self):
        return str(self.__dict__)
class dlc:
    """
    Represents each annotated video, holding the dataframe, attempt/nonattempt times, frames, and classes
    Atrributes: check init signature
    
    Methods:
        get_features(self, params): returns features from dataframe according to attempt/nonattempt frames,
        according to params
        
        get_labels(self): returns labels from video according to attempt/nonattempt labels
        
        get_label_counts(self): return dictionary of each class and the number of occurences
    """
    def __init__(self, path_to_h5, attempt_times, attempt_labels, nonattempt_times=[], window=12):
        self.path = path_to_h5
        self.df = pd.read_hdf(path_to_h5)
        self.df.columns = self.df.columns.droplevel()
        self.attempt_times = attempt_times
        self.attempt_frames = [time_to_frame(x) for x in attempt_times]
        self.attempt_classes = attempt_labels
        self.nonattempt_times = nonattempt_times
        self.nonattempt_frames = [time_to_frame(x) for x in nonattempt_times]
        self.window = window
    def get_features(self, params):
        """
        get_features(self, params): returns features from dataframe according to attempt/nonattempt frames,
        according to params
        
        Returns:
        features (2d ndarray): set of features according to self.attempt/nonattempt_frames and params
        """
        return get_dataset(self.df, self.attempt_frames+self.nonattempt_frames, params)
    def get_labels(self):
        """get_labels(self): returns labels from dataframe according to attempt/nonattempt labels"""
        return np.asarray(self.attempt_classes + ['n' for x in self.nonattempt_frames])
    def get_label_counts(self):
        """get_label_counts(self): return dictionary of each class and the number of occurences"""

        label_counts = {'s': (self.attempt_classes + ['n' for x in self.nonattempt_frames]).count('s'),
                        'f': (self.attempt_classes + ['n' for x in self.nonattempt_frames]).count('f'), 
                        'r': (self.attempt_classes + ['n' for x in self.nonattempt_frames]).count('r'),
                        'n': (self.attempt_classes + ['n' for x in self.nonattempt_frames]).count('n')}
        return label_counts
    def __repr__(self):
        return self.path
class rfc:
    """
    Represents the DATA BEHIND a RFC
    Attributes: list of dlc objects, parameter object
    Methods:
        get_classifier(self, blacklist=[]): returns RFC trained on self.dlc_objects, except for those in 
        blacklist
        
        test_rfc(self, params, test_vids, iterations=10, blacklist=[]): Creates RFC from dlc_objects that are 
        not in test_vids or blacklist. Tests the RFC on test_vids, printing out results and returning a 
        ?dataframe detailing what was mis-classified
        
        get_all_times(self, blacklist=[]): returns all recorded attempt/nonattempt times in self.dlc_objects
        
        get_all_labels(self, blacklist=[]): returns all recorded attempt/nonattempt labels in self.dlc_objects
        
        get_label_counts(self, blacklist=[]): returns dictionary of each label it's number of occurrences,
        except those from an dlc object in blacklist

    """
    def __init__(self, dlc_objects, params):
        self.dlc_objects = dlc_objects
        self.params = params
    def get_classifier(self, blacklist=[]):
        #compile features and labels from all dlc_objects, then create classifier
        compiled_features = np.concatenate([x.get_features(self.params) for x in self.dlc_objects if x not in blacklist])
        compiled_labels = np.concatenate([x.get_labels() for x in self.dlc_objects if x not in blacklist])
        return create_classifier(compiled_features, compiled_labels, self.params)[0]
    def test_rfc(self, params, test_vids, iterations = 10, blacklist=[]):
        num_wrong, num_right_times, fp, fn = 0, 0, 0, 0
        guess_to_actual = []
        
        #test videos one by one
        for vid in test_vids:
            solutions = list(zip(vid.attempt_times, vid.attempt_classes))
            print("************************", vid, "****************************")
            print(solutions)
            for i in range(iterations):
                seed = random.randint(234, 1000)
                attempt_classifier = self.get_classifier(blacklist=test_vids + blacklist)
                print(attempt_classifier.n_features_, i)

                starts = get_classified_starts(attempt_classifier, vid.df, p)
                correct_times = find_differences(starts[:], solutions[:], 0.3, verbose=params.verbose)

                pred_label_for_correct_times = [x[1][0] for x in correct_times[0]]
                true_label_for_correct_times = [x[1] for x in correct_times[1]]
                
                num_right_times += len(correct_times[0])
                num_wrong += sum(np.asarray(pred_label_for_correct_times) != np.asarray(true_label_for_correct_times))
                print("number of times correct:", len(correct_times[0]), "out of", len(solutions), "(not label correctness)")
                print("number of labels correct:", len(correct_times[0]) - sum(np.asarray(pred_label_for_correct_times) != np.asarray(true_label_for_correct_times)), "out of the", len(correct_times[0]), "correct times")


                guess_to_actual.extend(list(zip(pred_label_for_correct_times, true_label_for_correct_times)))
                
            print(num_wrong/iterations)
            print(num_right_times/iterations)
            guess_to_actual_df = pd.DataFrame(guess_to_actual, columns = ['guess', 'actual'])
        print(guess_to_actual_df.groupby(guess_to_actual_df.columns.tolist(),as_index=False).size())
        return guess_to_actual
    def get_all_times(self, blacklist=[]):
        times = []
        for dlc in self.dlc_objects:
            if dlc in blacklist:
                continue
            times.extend(dlc.attempt_times)
            times.extend(dlc.nonattempt_times)
        return times
    def get_all_labels(self, blacklist=[]):
        labels = []
        for dlc in self.dlc_objects:
            if dlc in blacklist:
                continue
            labels.extend(dlc.attempt_classes)
            labels.extend(['n' for x in dlc.nonattempt_frames])
        return labels
    def get_label_counts(self, blacklist=[]):
        all_labels = self.get_all_labels(blacklist)
        label_counts = {'s': all_labels.count('s'),'f': all_labels.count('f'), 'r': all_labels.count('r'),
                        'n': all_labels.count('n')}
        return label_counts

In [21]:
p = parameters(['Nose', 'DomInside', 'Pellet', 'Index', 'OtherHand'], 0, 30, 0.01, step_size=1, 
               n_trees=100,seed=79, max_feats=150)

In [22]:
print(n3v3)
p.frames_after = 80
p.frames_before = 5
p.test_fraction = 0
p.step_size = 2
print(p)

../../gopro3lens-vj-2019-07-08/videos/2019.07.08_CC42970_N3_V3_2.7k_120FPS_Label_copyDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5
{'cols': ['Nose', 'DomInside', 'Pellet', 'Index', 'OtherHand'], 'frames_before': 5, 'frames_after': 80, 'test_fraction': 0, 'step_size': 2, 'n_trees': 100, 'seed': 79, 'depth': None, 'max_feats': 150, 'verbose': False}


In [26]:
sample_rfc = rfc(lst, p)
for x in lst:
    sample_rfc.test_rfc(p, [x], iterations=1, blacklist=[])

************************ ../../gopro3lens-vj-2019-07-08/videos/gp2.7k3lensA1DeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5 ****************************
[(19.12, 'r'), (23.752, 'f'), (24.186, 'r'), (24.63, 'r'), (36.846, 's'), (43.652, 'f'), (44.042, 's'), (46.964, 'r'), (60.218, 's'), (62.897, 'r'), (64.787, 'r'), (72.045, 'r'), (85.174, 's'), (88.196, 'r'), (91.652, 's'), (93.248, 'r'), (93.814, 'r'), (106.294, 'r'), (106.867, 'f'), (111.344, 'r'), (112.249, 'r'), (113.407, 'r'), (115.474, 'f'), (115.918, 'r'), (129.119, 's'), (136.837, 'r'), (138.581, 'r')]
Training Features Shape: (677, 1275) Training Labels Shape: (677,)
Testing Features Shape: (0, 1275) Testing Labels Shape: (0,)
1275 0
(0.709, 'f')
false positives:  77
false negatives:  0
number of times correct: 27 out of 27 (not label correctness)
number of labels correct: 25 out of the 27 correct times
2.0
27.0
guess  actual
f      f          4
       r          2
r      r         15
s      s          6
dtype: int64
*******

Training Features Shape: (771, 1275) Training Labels Shape: (771,)
Testing Features Shape: (0, 1275) Testing Labels Shape: (0,)
1275 0
(0.976, 's')
false positives:  20
false negatives:  0
number of times correct: 14 out of 14 (not label correctness)
number of labels correct: 13 out of the 14 correct times
1.0
14.0
guess  actual
f      f         10
       s          1
s      s          3
dtype: int64
************************ ../../gopro3lens-vj-2019-07-08/videos/2019.07.16_CC42970_N3_V1_2.7k_120FPS_copyDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5 ****************************
[(27.6, 's'), (32.8, 's'), (51.97, 's'), (55.98, 's'), (64.04, 'f'), (76.71, 's')]
Training Features Shape: (779, 1275) Training Labels Shape: (779,)
Testing Features Shape: (0, 1275) Testing Labels Shape: (0,)
1275 0
(3.962, 's')
false positives:  38
false negatives:  0
number of times correct: 6 out of 6 (not label correctness)
number of labels correct: 5 out of the 6 correct times
1.0
6.0
guess  actual
f   

### Training Data

In [20]:
lst = []
#"C:\Users\vjj14\Documents\GoProLens\3lens\gp2.7k3lensA1.mp4"
h5 = "../../gopro3lens-vj-2019-07-08/videos/gp2.7k3lensA1DeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
attempt_times = [19.12, 23.752, 24.186, 24.630, 36.846, 43.652, 44.042, 46.964, 60.218, 62.897, 64.787, 72.045, 85.174, 88.196, 
           91.652, 93.248, 93.814, 106.294, 106.867, 111.344, 112.249, 113.407, 115.474, 115.918, 129.119, 136.837, 138.581]
attempt_labels = ['r', 'f', 'r', 'r', 's', 'f', 's', 'r', 's', 'r', 'r', 'r', 's', 'r', 's', 'r', 'r', 'r', 'f', 'r', 'r', 
                      'r', 'f', 'r', 's', 'r', 'r']
nonattempt_times = get_nonattempt_times(len(attempt_times)*3, attempt_times, 0.1)
lst.append(dlc(h5, attempt_times, attempt_labels, nonattempt_times=nonattempt_times))
A = lst[-1]

#"D:\DCIM\100GOPRO\2019.07.08_CC42970_N2_V3_2.7k_120FPS_Label_copy.mp4"
#includes all attempts
h5 = "../../gopro3lens-vj-2019-07-08/videos/2019.07.08_CC42970_N2_V3_2.7k_120FPS_Label_copyDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
attempt_times = [12.325, 16.568, 17.55, 18.912, 20.41, 22.1, 22.92, 24.699, 27.4, 29.754, 30.431, 31.39, 32.692, 37.731, 
           38.258, 42.86, 45.714, 56.45, 76.803, 77.86, 86.2, 86.75, 91.13, 91.649, 97.563, 98.1, 106.739, 107.292, 116.412, 
           121.49, 122.236, 124.775, 125.375, 134.324, 134.859, 142.37, 143.0, 150.0, 159.435, 160.04, 165.02, 
           165.633, 174.018, 177.129, 179.23, 183.719, 185.755, 192.041, 197.214, 198.66, 201.286, 203.34, 204.15, 206.253, 
           209.36, 213.027, 213.895, 217.676, 218.325, 218.982, 223.198, 226.56, 230.524, 233.61, 234.286, 235.838, 236.316, 
           237.091, 250.448, 251.206, 253.05]
attempt_labels = ['s', 'r', 'f', 'r', 'r', 'f', 'r', 'r', 'r', 'f', 'r', 'r', 'r', 'f', 'r', 'r', 's', 'r', 'f', 'r', 
                  'f', 'r', 'f', 'r', 'f', 'r', 'f', 'r', 'f', 'r', 'r', 'f', 'r', 'f', 'r', 'f', 'r', 'f', 'f', 'r', 
                  'f', 'r', 's', 'r', 's', 'f', 'r', 's', 'f', 'r', 'f', 'r', 'r', 's', 'r', 'f', 'r', 'r', 'f', 'r', 's', 
                  'r', 's', 'r', 'r', 'f', 'f', 'r', 'f', 'r', 'r']
nonattempt_times = get_nonattempt_times(len(attempt_times)*3, attempt_times, 0.1)
lst.append(dlc(h5, attempt_times, attempt_labels, nonattempt_times=nonattempt_times))
n2v3 = lst[-1]

#"D:\DCIM\100GOPRO\2019.07.08_CC42970_N3_V3_2.7k_120FPS_Label_copy.mp4"
h5 ="../../gopro3lens-vj-2019-07-08/videos/2019.07.08_CC42970_N3_V3_2.7k_120FPS_Label_copyDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
attempt_times = [float(x) for x in ['11.589', '17.755', '18.474', '20.810', '22.457', '25.491', '37.403', '41.884', '51.558', '57.359', 
                         '64.503', '78.54', '84.611', '96.276', '108.619', '117.017', '123.131', '130.131', '157.557', 
                         '164.342', '207.559']]
attempt_labels = ['s', 'f', 'r', 'r', 'r', 's', 'f', 's', 'f', 's', 'f', 'f', 's', 'f', 's', 's', 'f', 'f', 's', 
                          's', 'f']
nonattempt_times = [3.333, 126.886, 133.355, 205.494, 77.901, 94.947, 26.119, 73.896, 166.391, 51.277, 5.579, 16.697, 19.257, 
                     22.71, 28.645, 31.2]
lst.append(dlc(h5, attempt_times, attempt_labels, nonattempt_times=nonattempt_times))
n3v3 = lst[-1]

#"D:\DCIM\100GOPRO\2019.07.08_CC42973_N2INJURED_V3_2.7k_120FPS_Label.MP4"
h5 = "../../gopro3lens-vj-2019-07-08/videos/2019.07.08_CC42973_N2INJURED_V3_2.7k_120FPS_Label_copyDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
attempt_times = [float(x) for x in ['21.196', '66.492', '71.227', '88.349', '92.221', '99.692', '116.299', '127.703', '135.332', 
                                '142.429', '147.11', '151.609', '157.722', '163.4', '172.798', '183.103', '204.054', '208.735', '216.329']]
attempt_labels = ['f', 's', 'f', 'f', 'r', 'f', 'f', 's', 'f', 'f', 's', 's', 's', 's', 'f', 'f', 'f', 'f', 's']
nonattempt_times = [157.463, 38.734, 62.743, 66.176, 88.856, 94.266, 171.907, 179.534, 184.588, 207.388, 72.36, 86.4, 88.5, 90.172]
lst.append(dlc(h5, attempt_times, attempt_labels, nonattempt_times=nonattempt_times))
injuredn2v3 = lst[-1]

#"C:\Users\vjj14\Desktop\DeepLabCut\gopro3lens-vj-2019-07-08\videos\2019.07.08_CC42970_N2_V2_2.7k_120FPS_rf_copyDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
h5 = "../../gopro3lens-vj-2019-07-08/videos/2019.07.08_CC42970_N2_V2_2.7k_120FPS_rf_copyDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
attempt_times = [float(x) for x in ['14.711', '32.655', '42.082', '42.718', '50.958', '53.474', '54.719', '55.383', '55.788', '57.26', '58.26', 
               '62.387', '62.965', '64.111', '70.942', '77.294', '77.84', '78.877', '80.16', '81.473', '82.164', '83.054', 
               '87.903', '93.567', '94.042', '94.689', '100.837', '101.711', '102.704', '111.962', '112.742', '118.758', 
               '127.66', '128.541', '129.396', '134.4', '138.618', '139.764', '140.528', '142.385', '142.960', '143.816', 
               '145.012', '149.907', '150.44', '151.57', '152.573', '170.386', '171.026', '176.764', '177.220', '179.808', 
               '192.836', '197.232', '198.342', '198.911', '199.796']]
attempt_labels = ['f', 'f', 'f', 'f', 'r', 'f', 'f', 'f', 'f', 'f', 'r', 'f', 'f', 'r', 'r', 'f', 'r', 'r', 'r', 'f', 'f', 
                     'r', 's', 'f', 'f', 'r', 'f', 'r', 'r', 'f', 'r', 'f', 'f', 'r', 'r', 's', 'r', 'r', 'r', 'f', 'f', 'r', 
                     'r', 'f', 'r', 'r', 'r', 'f', 'r', 'f', 'r', 'r', 's', 'r', 'f', 'f', 'r']
nonattempts_times = get_nonattempt_times(len(attempt_times)*3, attempt_times, 0.1)
lst.append(dlc(h5, attempt_times, attempt_labels, nonattempt_times=nonattempt_times))
n2v2 = lst[-1]

#Left handed
#??????
h5 = "../../gopro3lens-vj-2019-07-08/videos/2019.07.08_CC42970_N1_V2_2.7k_120FPS_rf_copy_flippedDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
attempt_times = [float(x) for x in ['1.923', '2.850', '5.226', '5.620', '12.312', '20.540', '23.473', '24.856', '25.37', '29.487', 
        '33.058', '39.422','39.930', '40.528', '40.955', '52.772', '76.416', '89.048','115.462', 
        '125.123', '137.004', '160.532', '172.26', '220.833', '230.798', '240.640', '246.108','256.624',
        '270.936', '293.789', '303.168', '459.340', '510.407', '519.217', '519.513']]
attempt_labels = ['r', 'r', 'f', 's', 'r', 'r', 'r', 'r', 'r', 's', 'r', 'r', 'r', 'r', 'r', 'f', 'f', 's', 'f', 
         's', 's', 's', 'f', 'f', 's', 'f', 'r', 's', 'f', 'f', 'f', 's', 's', 'f', 's']
lst.append(dlc(h5, attempt_times, attempt_labels))
n1v2 = lst[-1]

#"C:\Users\vjj14\Desktop\DeepLabCut\gopro3lens-vj-2019-07-08\videos\2019.07.08_CC42973_N2INJURED_V1_2.7k_120FPS_rf_trim_copyDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
h5 = "../../gopro3lens-vj-2019-07-08/videos/2019.07.08_CC42973_N2INJURED_V1_2.7k_120FPS_rf_trim_copyDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
attempt_times =  [float(x) for x in ['5.384', '5.929', '20.437', '24.076', '24.565', '34.586', '35.160', '44.708', '45.297', '49.574', 
        '54.857', '62.150', '62.78', '67.446', '67.734', '84.11', '93.654', '94.114', '94.849', '102.198', 
        '107.123', '111.213', '119.381', '132.425', '140.596', '147.813', '161.0']]
attempt_labels = ['f', 'r', 'r', 'f', 'r', 'f', 'r', 'f', 'r', 'f', 's', 'f', 'f', 'f', 'r', 'f', 'f', 'f', 'r', 'f', 
         's', 'f', 'f', 's', 'f', 'f', 's']
lst.append(dlc(h5, attempt_times, attempt_labels))
injuredn2v1 = lst[-1]

#??????"C:\Users\vjj14\Desktop\DeepLabCut\gopro3lens-vj-2019-07-08\videos\2019.07.08_CC42973_N2INJURED_V4_2.7k_120FPS_rf_trim_copyDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
h5 = "../../gopro3lens-vj-2019-07-08/videos/2019.07.08_CC42973_N2INJURED_V4_2.7k_120FPS_rf_trim_copyDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
attempt_times = [float(x) for x in ['1.093', '7.330', '14.433', '20.814', '24.928', '30.84', '35.268', '42.592', '47.269', 
               '54.334', '58.980', '65.483', '66.179', '85.683']]
attempt_labels = ['s', 'f', 'f', 'f', 's', 'f', 'f', 's', 'f', 's', 'f', 'f', 'f', 'f']
lst.append(dlc(h5, attempt_times, attempt_labels))
injuredn2v4 = lst[-1]

#"D:\DCIM\100GOPRO\2019.07.16_CC42970_N3_V1_2.7k_120FPS_copy.MP4"
h5 = "../../gopro3lens-vj-2019-07-08/videos/2019.07.16_CC42970_N3_V1_2.7k_120FPS_copyDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
attempt_times = [27.6, 32.8, 51.97, 55.98, 64.04, 76.71]
attempt_labels = ['s', 's', 's', 's', 'f', 's']
lst.append(dlc(h5, attempt_times, attempt_labels))
n3v1_07_16 = lst[-1]

#???
h5 = "../../gopro3lens-vj-2019-07-08/videos/2019.07.16_CC42970_N2_V1_2.7k_120FPS_splicedDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
attempt_times = [4.0, 9.25, 16.93, 23.85, 37.2, 37.92, 57.11, 74.06, 80.25, 101.76, 107.54, 135.48, 136, 145.53, 
              149.03, 164.23, 164.76, 165.41, 166.16, 167.91, 169.91, 173.4, 179.4, 241.1, 245.76, 278.5, 281.7, 
               282.1, 282.85, 285.65, 291.68, 296.75, 310.86, 311.4]
attempt_labels = ['s', 's', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 's', 'f', 'f', 'f', 'r', 'r',
                'r', 'r', 'r', 'f', 's', 'f', 's', 'f', 'f', 'r', 'r', 'r', 's', 'f', 's']
lst.append(dlc(h5, attempt_times, attempt_labels))
n2v1_07_16 = lst[-1]

# #???
# h5 = "../../gopro3lens-vj-2019-07-08/videos/"
# attempt_times = [31.58, 144.08, 157.66, 162.92, 176.26, 204.35, 214.1, 347.0]
# attempt_labels = ['f', 'f', 'f', 'f', 'f', 'f', 's', 's']
#lst.append(dlc(h5, attempt_times, attempt_labels))
# n2v2_07_16 = lst[-1]

#???
h5 = "../../gopro3lens-vj-2019-07-08/videos/2019.07.16_CC42970_N2_V3_2.7k_120FPS_splicedDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
attempt_times = [4.13, 5.4, 6.4, 7.65, 40.65, 52.8, 65.08, 65.6, 72.23, 77.91, 78.5, 98.25, 103.91, 140.87, 190.2,
              213.1, 281.3]
attempt_labels = ['f', 'r', 'r', 's', 's', 's', 'f', 'r', 'f', 'f', 'f', 's', 'r', 's', 's', 'f', 's']
lst.append(dlc(h5, attempt_times, attempt_labels))
n2v3_07_16 = lst[-1]

#???
h5 = "../../gopro3lens-vj-2019-07-08/videos/2019.07.16_CC42970_N3_V2_2.7k_120FPS_splicedDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
attempt_times = [29.33, 93.86, 99.4, 149.5, 169.16]
attempt_labels = ['s', 's', 's', 's', 's']
lst.append(dlc(h5, attempt_times, attempt_labels))
n3v2_07_16 = lst[-1]

#???
h5 = "../../gopro3lens-vj-2019-07-08/videos/2019.07.16_CC42970_N3_V3_2.7k_120FPS_splicedDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
attempt_times = [18.4, 22.03, 29.86, 61.01, 63.86, 64.27, 71.59, 84.66, 94.82]
attempt_labels = ['s', 'f', 'f', 's', 'f', 'f', 'f', 's', 's']
lst.append(dlc(h5, attempt_times, attempt_labels))
n3v3_07_16 = lst[-1]

#???
h5 = "../../gopro3lens-vj-2019-07-08/videos/2019.07.16_CC42973_N2INJURED_V1_2.7k_120FPS_copyDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
attempt_times = [5.6, 23.6, 32.84, 36.14, 43.5, 50.14, 60.75, 64.7, 70.87, 87.25, 92.0, 122.9, 182.95, 246.75, 
               284.23, 290.43, 303.5, 303.92, 324.65, 332.8, 367.56]
attempt_labels = ['s', 'f', 's', 's', 's', 'f', 's', 's', 'f', 's', 'f', 'f', 's', 'f', 's', 's', 'f', 's', 'f', 
                's', 's']
lst.append(dlc(h5, attempt_times, attempt_labels))
injuredn2v1_07_16 = lst[-1]

#???
h5 = "../../gopro3lens-vj-2019-07-08/videos/2019.07.20_CC42970_N3_V1_copyDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
attempt_times = [20.593, 28.214, 29.628, 33.882, 34.304, 36.336, 42.306, 55.788, 71.247, 73.5, 80.66, 88.88, 
               91.272, 95.146, 119.457, 121.811, 122.453, 139.831, 142.811, 151.15, 179.413, 188.505, 196.2, 
               200.164, 206.380, 209.948, 214.108, 223.632, 225.012, 226.792, 234.678, 243.545, 243.886, 
               244.451, 262.539, 287.947, 308.432, 314.427, 322.211, 330.988, 349.099, 351.323, 357.912, 378.158,
               379.638, 395.209, 411.635, 438.878, 447.609, 456.933, 467.788, 468.905, 475.038, 493.397, 512.640, 
               519.380]
attempt_labels = ['f', 'f', 'r', 'r', 'f', 'r', 'r', 'f', 's', 'r', 'f', 'f', 'r', 'r', 'f', 'r', 'r', 'r', 's', 
                'f', 's', 'f', 's', 'r', 's', 'r', 's', 's', 'r', 'r', 's', 'f', 'r', 'r', 's', 'f', 's', 's', 
                's', 's', 'f', 'r', 'f', 'f', 'r', 'f', 'f', 's', 's', 's', 'f', 'f', 'f', 'f', 'f', 's']
lst.append(dlc(h5, attempt_times, attempt_labels))
n3v1_07_20 = lst[-1]

#???
h5 = "../../gopro3lens-vj-2019-07-08/videos/2019.07.20_CC42970_N3_V2_copyDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
attempt_times = [6.944, 41.941, 104.533, 124.3, 160.669, 167.118, 176.614, 231.837, 392.318]
attempt_labels = ['s', 's', 's', 'f', 's', 'f', 'f', 'f', 's']
lst.append(dlc(h5, attempt_times, attempt_labels))
n3v2_07_20 = lst[-1]


#???
h5 = "../../gopro3lens-vj-2019-07-08/videos/2019.07.20_CC42973_N2INJ_V1_copyDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
attempt_times = [10.167, 11.401, 15.1, 17.312, 35.069, 35.533, 38.013, 52.693, 62.713, 74.648, 83.714, 89.737, 
               90.178, 99.677, 100.003, 107.295, 107.672, 110.572, 116.553]
attempt_labels = ['f', 'f', 'f', 'f', 'f', 'f', 'r', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 's', 'f', 'f', 'r', 'f']
lst.append(dlc(h5, attempt_times, attempt_labels))
injuredn2v1_07_20 = lst[-1]

# #???
# h5 = "../../gopro3lens-vj-2019-07-08/videos/2019.07.20_CC42973_N2INJ_V2_copyDeepCut_resnet50_gopro3lensJul8shuffle1_120005.h5"
# attempt_times = [0.95, 7.733, 17.775, 22.483, 27.651, 35.692, 36.259, 53.425, 53.961, 55.356, 62.438, 70.604, 79.640, 
#                80.111, 83.914, 88.973, 93.728, 99.419, 99.921, 100.832, 117.864, 126.159, 126.629, 131.242, 
#                152.901, 153.312, 169.244, 169.681, 170.298, 172.057, 172.707, 176.290, 178.2, 178.651, 184.909, 207.116, 
#                207.513, 223.898, 224.791, 232.665, 241.268, 244.793, 245.431, 256.866, 270.817, 271.35, 
#                277.649, 282.654, 284.676, 286.434, 291.266]
# attempt_labels = ['s', 'f', 's', 'r', 'f', 'f', 'r', 'f', 'r', 'r', 's', 's', 'f', 'r', 'r', 's', 'r', 'f', 'f', 
#                 'r', 's', 'f', 's', 'r', 'f', 's', 'f', 'f', 'f', 'f', 'f', 'r', 'f', 'r', 'r', 'f', 'r', 'f', 
#                 'r', 's', 'f', 'r', 'r', 'r', 'f', 'r', 's', 'r', 'r', 's', 'r']
# lst.append(dlc(h5, attempt_times, attempt_labels))
# injuredn2v2_07_20 = lst[-1]

# Find Success, Fails, and Reaches

In [24]:
from statistics import mode, StatisticsError
#Helper Functions
def get_starts(lst):
    def most_common(l):
        try:
            return [mode(l)]
        except StatisticsError as e:
            # will only return the first element if no unique mode found
            if 'no unique mode' in e.args[0]:
                print("TIE: ", l)
                return [l[0]]
            # this is for "StatisticsError: no mode for empty data"
            # after calling mode([])
    lst = [(frame_to_time(x[0]), x[1]) for x in lst if x[1] != 'n']
    print(lst[0])
    lst.insert(0, (-999, 'sentinel'))
    starts = []
    classes_list = []
    for i in range(0, len(lst)):
        if lst[i][1] != 'sentinel':
            classes_list.append(lst[i][1])
        #most common
#         if lst[i][0] - lst[i-1][0] > .2:
#             starts.append((lst[i][0], most_common(classes_list), classes_list))
#             classes_list = []
#             run = 0
        #last of the run
        if lst[i][0] - lst[i-1][0] > .2:
            starts.append((lst[i][0], classes_list[-1], classes_list))
            classes_list = []
    return starts

#Find Reaches, 
def get_classified_starts(classifier, df, p):
    dataframe = df[p.cols]
    inputs = []
    num_frames = p.frames_before + p.frames_after
    for index in range(num_frames, dataframe.shape[0] - num_frames, p.step_size):
        temp_df = dataframe.iloc[index - p.frames_before: index + p.frames_after]
        inputs.append(temp_df.values.flatten())
    inputs = np.asarray(inputs)
    predictions = classifier.predict(inputs)
    start_frames = list(range(num_frames, dataframe.shape[0] - num_frames, p.step_size))
    
    class_to_predictions = {}
    return get_starts(list(zip(start_frames, predictions)))
#     for start, prediction in zip(start_frames, predictions2):
#         for c in classes:
#             if prediction == c:
#                 if c in class_to_predictions.keys():
#                     class_to_predictions[c].append(frame_to_time(start))
#                 else:
#                     class_to_predictions[c] = [frame_to_time(start)]
#     for c in classes:
#         hold = get_starts(class_to_predictions[c])
#         class_to_predictions[c] = hold
#     return class_to_predictions


In [25]:
def find_differences(predicted, solutions, epsilon, verbose=False):
    index = 0
    while index < len(predicted) and index < len(solutions):
        diff = predicted[index][0] - solutions[index][0]
        if abs(diff) < epsilon:
            index += 1
            continue
        elif diff > 0:
            predicted.insert(index, (9999, ['x'], ['x']))
        elif diff < 0:
            solutions.insert(index, (9999, 'x'))
        index += 1
    while len(predicted) < len(solutions):
        predicted.append((9999, ['x'], ['x']))
    while len(solutions) < len(predicted):
        solutions.append((9999, 'x'))
    false_positives = [predicted[i] for i in range(len(predicted)) if solutions[i][0] == 9999]
    false_negatives = [solutions[i] for i in range(len(predicted)) if predicted[i][0] == 9999]
    if verbose:
        print("false positives: ", false_positives, "\nCOUNT: ", len(false_positives))
        print("false negatives: ", false_negatives, "\nCOUNT: ", len(false_negatives))
    else:
        print("false positives: ", len(false_positives))
        print("false negatives: ", len(false_negatives))
    for i in range(len(predicted)-1, -1, -1):
        if predicted[i][0] == 9999 or solutions[i][0] == 9999:
            predicted.pop(i)
            solutions.pop(i)
    return (predicted, solutions)