From 5789daf21245931b8ec63b4cb1626f87f63adea8 Mon Sep 17 00:00:00 2001
From: changan <q604815016@gmail.com>
Date: Thu, 6 Apr 2017 19:27:13 -0700
Subject: [PATCH] add interface

---
 models/finetuned_resnet.py |  2 +-
 predict.py                 | 22 ++++++++++------------
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/models/finetuned_resnet.py b/models/finetuned_resnet.py
index e61ddd8..74a38b2 100644
--- a/models/finetuned_resnet.py
+++ b/models/finetuned_resnet.py
@@ -1,7 +1,7 @@
 import os
 from keras.models import Model
 from keras.layers import Dense, Flatten, Dropout
-from models.resnet50 import ResNet50
+from .resnet50 import ResNet50
 
 N_CLASSES = 101
 IMSIZE = (216, 216, 3)
diff --git a/predict.py b/predict.py
index 3fd2c4e..99d0646 100644
--- a/predict.py
+++ b/predict.py
@@ -1,8 +1,8 @@
 import os
 import numpy as np
-from utils.UCF_utils import two_stream3_generator, two_stream18_generator
-from models.two_stream import two_stream_model
-from utils.OF_utils import stack_optical_flow
+from .utils.UCF_utils import two_stream3_generator, two_stream18_generator
+from .models.two_stream import two_stream_model
+from .utils.OF_utils import stack_optical_flow
 import cv2
 import random
 from scipy.misc import imresize
@@ -67,10 +67,7 @@ def predict_two_stream18_test():
     print('test accuracy on', steps, 'examples is', float(correct_num) / steps)
 
 
-def predict_single_video(video_path):
-    spatial_weights = '/Users/cjc/cv/ActionRecognition/data/finetuned_resnet_RGB_65.h5'
-    temporal_weights = '/Users/cjc/cv/ActionRecognition/data/temporal_cnn_42.h5'
-    model = two_stream_model(spatial_weights, temporal_weights)
+def predict_single_video(model, video_path, top_num):
     cap = cv2.VideoCapture(video_path)
     video = list()
     while cap.isOpened():
@@ -93,8 +90,7 @@ def predict_single_video(video_path):
 
     two_stream_input = [single_frame, of_input]
     preds = model.predict(two_stream_input)
-    top_3_types = decode_prediction(preds, top=3)
-    print('top-3: ', top_3_types)
+    return decode_prediction(preds, top=top_num)
 
 
 def _pick_frames(video_sequence, num_frame):
@@ -112,14 +108,16 @@ def _pick_frames(video_sequence, num_frame):
 
 
 def decode_prediction(preds, top=3):
-    index_dir = '/Users/cjc/cv/ActionRecognition/data/ucfTrainTestlist/classInd.txt'
+    index_dir = '/home/changan/ActionRecognition/data/ucfTrainTestlist/classInd.txt'
     class_dict = dict()
     with open(index_dir) as fo:
         for line in fo:
             class_index, class_name = line.split()
             class_dict[int(class_index)-1] = class_name
     top = np.argsort(preds)[0][-top:][::-1]
-    return [class_dict[x] for x in top]
+    print(preds)
+    print(top)
+    return [(class_dict[x], preds[0][x]) for x in top]
 
 
 def preprocess_single_frame(frame):
@@ -138,4 +136,4 @@ def preprocess_single_frame(frame):
     # predict_two_stream18_test()
 
     # predict single video
-    predict_single_video(video_path='/Users/cjc/cv/ActionRecognition/data/v_BabyCrawling_g01_c01.mp4')
+    # predict_single_video(video_path='/Users/cjc/cv/ActionRecognition/data/v_BabyCrawling_g01_c01.mp4')