In [None]:
from PIL import Image
import tensorflow as tf
import numpy as np
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.autograd import Variable
from PIL import Image
import numpy as np
import os
# import cv2
import pickle
from tqdm import tqdm

In [None]:
inputs = tf.keras.Input(shape=(224, 224, 3),batch_size=1, name="digits")
outputs = tf.keras.applications.InceptionResNetV2(
    include_top=False,
    weights="imagenet",
    input_shape=(224, 224, 3),
    pooling='avg',
    classes=1000,
)(inputs)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [None]:
# Block fc layer
class Identity(nn.Module):
	def forward(self, x):
		return x

# Image transforms
transf = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])


def get_frames(frames_folder_path):
	# Get all frame file names
	frames = None
	frames_file = os.listdir(frames_folder_path)
	for i,frame_file_name in enumerate(frames_file):
		frame = Image.open(os.path.join(frames_folder_path, frame_file_name))
		frame = transf(frame)

		if frames is None:
			frames = np.empty((len(frames_file), *frame.size()))
		frames[i] = frame

	return frames


def frames_features(frames_folder_path):
  frames = get_frames(frames_folder_path)
  # Run the model on input data
  output = None
  batch_size = 1                # 10 for PC
  for start_index in range(0, len(frames), batch_size):
    end_index = min(start_index + batch_size, len(frames))
    frame_range = range(start_index, end_index)
    frame_batch = frames[frame_range]
    frame_batch = np.moveaxis(frame_batch,1,3)
#     frame_batch = np.reshape(frame_batch,(1,224,224,3))
    avg_pool_value = model(frame_batch)
    if output is not None:
      output=np.concatenate((output,np.array(avg_pool_value))) 
    else:
      output=np.array(avg_pool_value)


  return output

features = {}

def videos_features(frames_folders_path, videos_path, save_path):
    # frames_folders_path: path to all video frames folders
    # video_path: path ot original videos
    n_frames = len(os.listdir(frames_folders_path))
    frames_folders = os.listdir(frames_folders_path)
    for frames_folder in tqdm(frames_folders, ncols=100, ascii=True):
        video_feature = {}
        video_name = os.path.join(videos_path, frames_folder + '.mp4')
        frames_folder_path = os.path.join(frames_folders_path, frames_folder)
        # cam = cv2.VideoCapture(video_name)
        # fps = round(cam.get(cv2.CAP_PROP_FPS), 0)
        feat = frames_features(frames_folder_path)
        # video_feature['fps'] = fps
        video_feature['resnet152'] = feat
        features[frames_folder] = video_feature
        #print("Process video {} FPS {} shape {}".format(frames_folder, fps, feat.shape))

    f = open(save_path,'wb')
    pickle.dump(features, f)


def main():
	frames_folders_path = "/content/drive/MyDrive/Frames"
	videos_path = "/content/drive/MyDrive/mmsd_raw_data/utterances_final"
	videos_features(frames_folders_path, videos_path, '/content/drive/MyDrive/inception_video_features_2.pkl')


if __name__ == '__main__':
	main()