From 5789daf21245931b8ec63b4cb1626f87f63adea8 Mon Sep 17 00:00:00 2001 From: changan Date: Thu, 6 Apr 2017 19:27:13 -0700 Subject: [PATCH] add interface --- models/finetuned_resnet.py | 2 +- predict.py | 22 ++++++++++------------ 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/models/finetuned_resnet.py b/models/finetuned_resnet.py index e61ddd8..74a38b2 100644 --- a/models/finetuned_resnet.py +++ b/models/finetuned_resnet.py @@ -1,7 +1,7 @@ import os from keras.models import Model from keras.layers import Dense, Flatten, Dropout -from models.resnet50 import ResNet50 +from .resnet50 import ResNet50 N_CLASSES = 101 IMSIZE = (216, 216, 3) diff --git a/predict.py b/predict.py index 3fd2c4e..99d0646 100644 --- a/predict.py +++ b/predict.py @@ -1,8 +1,8 @@ import os import numpy as np -from utils.UCF_utils import two_stream3_generator, two_stream18_generator -from models.two_stream import two_stream_model -from utils.OF_utils import stack_optical_flow +from .utils.UCF_utils import two_stream3_generator, two_stream18_generator +from .models.two_stream import two_stream_model +from .utils.OF_utils import stack_optical_flow import cv2 import random from scipy.misc import imresize @@ -67,10 +67,7 @@ def predict_two_stream18_test(): print('test accuracy on', steps, 'examples is', float(correct_num) / steps) -def predict_single_video(video_path): - spatial_weights = '/Users/cjc/cv/ActionRecognition/data/finetuned_resnet_RGB_65.h5' - temporal_weights = '/Users/cjc/cv/ActionRecognition/data/temporal_cnn_42.h5' - model = two_stream_model(spatial_weights, temporal_weights) +def predict_single_video(model, video_path, top_num): cap = cv2.VideoCapture(video_path) video = list() while cap.isOpened(): @@ -93,8 +90,7 @@ def predict_single_video(video_path): two_stream_input = [single_frame, of_input] preds = model.predict(two_stream_input) - top_3_types = decode_prediction(preds, top=3) - print('top-3: ', top_3_types) + return decode_prediction(preds, top=top_num) def _pick_frames(video_sequence, num_frame): @@ -112,14 +108,16 @@ def _pick_frames(video_sequence, num_frame): def decode_prediction(preds, top=3): - index_dir = '/Users/cjc/cv/ActionRecognition/data/ucfTrainTestlist/classInd.txt' + index_dir = '/home/changan/ActionRecognition/data/ucfTrainTestlist/classInd.txt' class_dict = dict() with open(index_dir) as fo: for line in fo: class_index, class_name = line.split() class_dict[int(class_index)-1] = class_name top = np.argsort(preds)[0][-top:][::-1] - return [class_dict[x] for x in top] + print(preds) + print(top) + return [(class_dict[x], preds[0][x]) for x in top] def preprocess_single_frame(frame): @@ -138,4 +136,4 @@ def preprocess_single_frame(frame): # predict_two_stream18_test() # predict single video - predict_single_video(video_path='/Users/cjc/cv/ActionRecognition/data/v_BabyCrawling_g01_c01.mp4') + # predict_single_video(video_path='/Users/cjc/cv/ActionRecognition/data/v_BabyCrawling_g01_c01.mp4')