# Vorbereitung

In [None]:
%env KERAS_BACKEND=theano
%env THEANO_FLAGS=floatX=float32,device=cpu

In [None]:
%load_ext autoreload
%autoreload 1
%aimport bb_behavior
%aimport bb_behavior.plot
%aimport bb_behavior.tracking
%aimport bb_behavior.tracking.pipeline

import bb_behavior
import bb_behavior.plot
import bb_behavior.tracking
import bb_behavior.tracking.pipeline

import pandas as pd
import time
import datetime

import os
import glob

# Run

In [None]:
from bb_behavior.tracking.pipeline import get_default_pipeline
default_pipeline = None
default_pipeline = get_default_pipeline(localizer_threshold="0.50")

In [None]:
from tqdm import tqdm_notebook # progress bar

import math
import numpy as np
from bb_tracking.data.constants import DETKEY
#from bb_tracking.tracking import score_id_sim_v
from bb_tracking.tracking import distance_orientations_v, distance_positions_v

from bb_behavior.tracking.pipeline import detect_markers_in_video
from bb_behavior.tracking.pipeline import track_detections_dataframe
from bb_behavior.tracking.pipeline import display_tracking_results

In [None]:
import time
import os

def filename_to_datestring(filname):
    """
    filename can be path
    """
    return os.path.split(filname)[-1].split('.')[0].split('_')[1]


def string_to_timestamp(datestring):
    """ 
    params
        string: format 2018-08-19-01-08-13
    output
        unix timestamp (float)
    """
    
    return time.mktime(time.strptime(datestring, "%Y-%m-%d-%H-%M-%S"))

def timestamp_to_string(timestamp):
    return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(timestamp))

In [None]:
config = dict(tag_pixel_diameter=50,
              n_frames=None,
              confidence_filter_detections=0.08,
              confidence_filter_tracks=0.20,
              coordinate_scale=1.0,
              start_time=None,
              fps=10.0,
              cam_id=0,
              left_leaving_area = 0.3, # Prozente vom Bildschirmrand, zB. bei 1000px und 0.15 -> 0-150px
              right_leaving_area = 0.3,
              px_x_resolution_vid = 1944,
              videos_dir = "../videos/")


In [None]:
# Einen Iterable speichern, der alle Videos in einem Iterable zur Verfügung stellt
# Diesen Iterable können wir dann in der nächsten Zelle mit tqdm schön durchlaufen
# nicht sehr performant --> O(n)
import os
base_directory = config["videos_dir"]
paths = [i for i in os.listdir(base_directory) if i.endswith(".h264")]
for i in range(len(paths)):
    paths[i] = base_directory + paths[i]

In [None]:
# Hier passiert das eigentliche Tracken und speichern der Ergebnisse:
num_processed_videos = 0
video_data = dict()
frame_info = None
detections = None
bad_paths = []

for path in tqdm_notebook(paths):
    start_time = config["start_time"]
    cam_id = config["cam_id"]
    try:
        num_processed_videos += 1

        frame_info, detections = detect_markers_in_video(path,
                                                      decoder_pipeline=default_pipeline,#pipeline=pipelines(),
                                                     tag_pixel_diameter=config["tag_pixel_diameter"],
                                                      n_frames=config["n_frames"],
                                                      fps=config["fps"],
                                                     progress="tqdm_notebook"
                                            )
        # Sonst würden keine Tracks erkannt werden -> Fehlermeldung
        if len(detections[detections['confidence']>=config["confidence_filter_detections"]]) == 0:
            continue
        tracks = track_detections_dataframe(detections,
                                            tracker="tracker.det_score_fun.frag_score_fun.dill",
                                            confidence_filter_detections=config["confidence_filter_detections"],
                                           confidence_filter_tracks=config["confidence_filter_tracks"],
                                            coordinate_scale=config["coordinate_scale"],
                                           )
        date_string = filename_to_datestring(path)
        tracks['video'] = date_string
        video_data[path] = (frame_info, detections, tracks)
    except ValueError as err: #tritt auf, wenn Video leer ist. In diesem Fall: überspringe video
        try:
            bad_paths.append(path)
            # wir arbeiten später nochmal mit paths, daher müssen das leere löschen, weil sonst
            # in video_data kein zugehöriger Value zu Key = file zu finden ist.
        except KeyError as err:
            continue
    except Exception as err:
        print(err)
        raise
    # only first vid: break

for bad_path in bad_paths:
    paths.remove(bad_path)

In [None]:
#video_data["file"][0] --> frame-info, [1] --> detections, [2] --> tracks
for path in paths:
    display_tracking_results(path, video_data[path][0], video_data[path][1], video_data[path][2])

# Modify data to default for all Algorithms / Save detection to .csv

In [None]:
tracks = [video_data[paths[x]][2] for x in range(len(paths))]
tracks = pd.concat(tracks,ignore_index=True)
tracks = tracks.drop(columns=["localizerSaliency", "beeID", "camID", "frameIdx"])

In [None]:
with open("tracks.csv", "w") as f:
    tracks.to_csv(f)

In [None]:
tracks

In [None]:
# ERGEBNIS-DICT, kann mit pd.DataFrame(data=bee_moves) in DataFrame gewandelt werden
# "bee_id":[int], "time_in":[String], "os_in":[Float], "time_out":[String],
# "os_out":[Float], "in_direction":[{"left", "right"}], "out_direction":[{"left", "right"}]
results = {"bee_id":[], "time_in":[], "os_in":[], "time_out":[], "os_out":[], "in_direction":[], "out_direction":[]}

# Hilfsfunktionen

In [None]:
# get all videos between timestamp_in and timestamp_out
def get_videos_between(timestamp_in, timestamp_out):
    """ returns all video between timestamp_in and timestamp out (inclusive) """
    
    all_paths = pd.DataFrame(glob.glob(os.path.join(config["videos_dir"], '*.h264')), columns=['video'])
    all_paths['video'] = all_paths['video'].apply(lambda x: filename_to_datestring(x))
    all_paths.sort_values(['video'])
    all_paths.head()
    
    mask = (all_paths['video'] >= timestamp_in) & (all_paths['video'] <= timestamp_out)
    return list(all_paths[mask]['video'])

## Zwischenschritt: Merge all close Tracks of one bee

In [None]:
def gather_tracks(tracks):
    """
    transform the tracks df to : bee_id, xpos, ypos, zrotation, timestamps, video_start_time, video_end_time, track_start_time
    """

    #  transform tracks df to : bee_id, [list of positions (x,y)], [list of timestamps], timestamp_of video
    grouped = tracks.groupby(['bee_id','track_id','video'],  as_index=False)['xpos','ypos','timestamp','zrotation']

    tracks_ml = grouped.aggregate(lambda x: list(x))

    # we don't need track_id anymore
    tracks_ml = tracks_ml.drop('track_id', 1)

    # add a column: convert video name to timestamp
    tracks_ml['video_start_time'] = tracks_ml['video'].apply(lambda x: string_to_timestamp(x))
    
    # we don't need track_id anymore
    tracks_ml = tracks_ml.drop('video', 1)

    # because aggregated: now multiple timestamps per row
    tracks_ml = tracks_ml.rename(columns={'timestamp': 'timestamps'})

    # calculate start time of track by adding timestamp of track (seconds since start of video)
    # to timestamp of video (date)
    tracks_ml['track_start_time'] = tracks_ml['video_start_time'] + tracks_ml['timestamps'].apply(lambda x: x[0])
    tracks_ml['track_end_time'] = tracks_ml['video_start_time'] + tracks_ml['timestamps'].apply(lambda x: x[-1])
    
    # convert back to string
    tracks_ml['video_start_time'] = tracks_ml['video_start_time'].apply(lambda x: timestamp_to_string(x))
    
    # get end video
    
    tracks_ml['video_end_time'] =  tracks_ml['track_end_time'].apply(lambda x: timestamp_to_string(x))
    
    tracks_ml['video_end_time'] =  tracks_ml[['video_start_time','video_end_time']].apply(lambda x: get_videos_between(x[0],x[1])[-1], axis=1)
        
    return tracks_ml

def merge_tracks(tracks_ml, verbose = False):
    """
    in: 
        tracks_ml: output from gather_tracks(tracks)
    out:
        same df as in, with merged rows
        
    merge tracks of same bee where start and end timestamps are close together
    assume there can not be overlapping tracks
    """
    
    # 1. sort: bee_id, start_time
    tracks_ml = tracks_ml.sort_values(['bee_id', 'track_start_time'])
    
    # first convert to timestamp
    tracks_ml['video_start_time'] = tracks_ml['video_start_time'].apply(lambda x: string_to_timestamp(x))
    
    index = 0
    while(True):
        row = tracks_ml.iloc[index]
        next_row = tracks_ml.iloc[index+1]

        # if the tracks are from same bee and the start time of next row is closer then 10s -> merge the rows
        # merge rows means, next_row is deleted
        if (row['bee_id'] == next_row['bee_id']) and ((next_row['track_start_time'] - row['track_end_time']) < 10):
            if verbose:
                print(next_row['track_start_time'] - row['track_end_time'])

            # update the timestamps of nextrow
            t = next_row['video_start_time'] - row['video_start_time']
            timestamps = list(np.array(next_row['timestamps']) + t)

            # merge xpos, ypos, timestamps lists
            tracks_ml.at[row.name,'xpos'] = row['xpos']+next_row['xpos']
            tracks_ml.at[row.name,'ypos'] = row['ypos']+next_row['ypos']
            tracks_ml.at[row.name,'timestamps'] = row['timestamps']+timestamps

            # update end_time
            tracks_ml.at[row.name,'track_end_time'] = next_row['track_end_time']

            # delete the merged row (next_row)
            tracks_ml.drop(tracks_ml.index[index+1], inplace=True)

        else:
            index += 1
        
        if index == len(tracks_ml) - 1:
            break
            
    # convert back to string
    tracks_ml['video_start_time'] = tracks_ml['video_start_time'].apply(lambda x: timestamp_to_string(x))
            
    return tracks_ml

In [None]:
tracks_ml = gather_tracks(tracks)
tracks_ml = merge_tracks(tracks_ml)
tracks_ml = tracks_ml.sort_values(['video_start_time'])

In [None]:
tracks_ml

# Algorithmus 1: Baseline - Areas as Decider

In [None]:
from math import pi
""" Vorgehen:
Für jedes Bienen-Track Paar
- Prüfe, wo sich die Biene beim ersten erkannten Erscheinen aufhält
    - Ordne die Position in left, right oder middle ein
- Prüfe, wo sich die Biene beim letzten erkannten Erscheinen aufhält
    - Ordne die Position in left, right oder middle ein
Wenn sich Biene in der Mitte befindet, starte neue Routine, die links oder rechts zuordnet
Daraus kann nun abgeleitet werden, wo die Biene reingekommen ist und wo sie rausgegangen ist.
"""
#results = {"bee_id":[], "time_in":[], "os_in":[], "time_out":[], "os_out":[], "in_direction":[], "out_direction":[]}
# param: tracks_ml, s. oben
def baseline_alg_classify_bee():
    def get_direction(xpos, zpos):
        #Helpers
        def is_left(xpos):
            return xpos <= config["left_leaving_area"]*config["px_x_resolution_vid"]
        def is_right(xpos):
            return xpos >= config["px_x_resolution_vid"] - config["right_leaving_area"]*config["px_x_resolution_vid"]
        # Routine, wenn Biene in der Mitte
        # Betrachte zRotation, das ist die Richtung in die die Biene guckt (in Bogenmaß)
        # gebe diese Richtung aus
        def middle_classifier(zRotation):
            if zRotation > abs(pi/2):
                return "left"
            else:
                return "right"
            
        # Eintrittsseite festlegen
        if is_left(xpos):
            return "left"
        elif is_right(xpos):
            return "right"
        else:
            return middle_classifier(zpos)
        
    for row in tracks_ml.itertuples(index=False):
        results["bee_id"].append(row[0])
        results["time_in"].append(row[5])
        results["os_in"].append(row[-3] - string_to_timestamp(row[5])) #track_start_time - video_start_time
        results["time_out"].append(row[-1])
        results["os_out"].append(row[-2] - string_to_timestamp(row[-1])) #track_end_time - video_end_time
        results["in_direction"].append(get_direction(row[1][0], row[4][0]))
        results["out_direction"].append(get_direction(row[1][-1], row[4][-1]))
    

In [None]:
# Ausgeben
baseline_alg_classify_bee()
ergebnis = pd.DataFrame(data=results)
ergebnis

# Algorithmus 2: Baseline - zpos as decider

In [None]:
from math import pi
""" Vorgehen:
Für jedes Bienen-Track Paar
- Prüfe, wo sich die Biene beim ersten erkannten Erscheinen aufhält
    - Ordne die Position in left, right oder middle ein
- Prüfe, wo sich die Biene beim letzten erkannten Erscheinen aufhält
    - Ordne die Position in left, right oder middle ein
Wenn sich Biene in der Mitte befindet, starte neue Routine, die links oder rechts zuordnet
Daraus kann nun abgeleitet werden, wo die Biene reingekommen ist und wo sie rausgegangen ist.
"""
#results = {"bee_id":[], "time_in":[], "os_in":[], "time_out":[], "os_out":[], "in_direction":[], "out_direction":[]}
# param: tracks_ml, s. oben
def baseline_2_alg_classify_bee():
    def get_in_direction(zpos):
        # Betrachte zRotation, das ist die Richtung in die die Biene guckt (in Bogenmaß)
        # gebe diese Richtung aus
        if zpos > abs(pi/2):
            return "right"
        else:
            return "left"
    def get_out_direction(zpos):
        # Betrachte zRotation, das ist die Richtung in die die Biene guckt (in Bogenmaß)
        # gebe diese Richtung aus
        if zpos > abs(pi/2):
            return "left"
        else:
            return "right"
        
    for row in tracks_ml.itertuples(index=False):
        results["bee_id"].append(row[0])
        results["time_in"].append(row[5])
        results["os_in"].append(row[-3] - string_to_timestamp(row[5])) #track_start_time - video_start_time
        results["time_out"].append(row[-1])
        results["os_out"].append(row[-2] - string_to_timestamp(row[-1])) #track_end_time - video_end_time
        results["in_direction"].append(get_in_direction(row[4][0]))
        results["out_direction"].append(get_out_direction(row[4][-1]))
    

In [None]:
# Ausgeben
baseline_2_alg_classify_bee()
ergebnis = pd.DataFrame(data=results)
ergebnis

# Postprocessing

## Get all videos from test set

In [None]:
# read in the test data csv
test_data = pd.read_csv('bees_test.csv')

# convert the full filenames to string timestamps, and sort by timestamp_in
test_data['video'] = test_data['video'].apply(lambda x: filename_to_datestring(x))
test_data['timestamp_in'] = test_data['timestamp_in'].apply(lambda x: filename_to_datestring(x))
test_data['timestamp_out'] = test_data['timestamp_out'].apply(lambda x: filename_to_datestring(x))
test_data.sort_values(['timestamp_in'])

test_data.head()

all_videos = []

# go through test_data and get all videos between timestamp_in and timestamp_out
for index, row in test_data.iterrows():
    
    all_videos += get_videos_between(row['timestamp_in'],row['timestamp_out'])
    
all_videos = list(set(all_videos))
len(all_videos)

# Vergleichsfunktion Algorithmus mit Labels