In [3]:
import cv2
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import math
import os
from IPython import display


In [11]:
# TF-Hub'dan modeli bir kez yükle.
hub_handle = 'https://tfhub.dev/deepmind/mil-nce/s3d/1'
hub_model = hub.load(hub_handle)

def generate_embeddings(model, input_frames, input_words):
  """Generate embeddings from the model from video frames and input words."""
  
  # Giriş karelerinin [0, 1] aralığında normalize edilmiş olması ve Batch x T x H x W x 3 şeklinde olması gerekmektedir.
  vision_output = model.signatures['video'](tf.constant(tf.cast(input_frames, dtype=tf.float32)))
  text_output = model.signatures['text'](tf.constant(input_words))
  return vision_output['video_embedding'], text_output['text_embedding']

In [12]:
# @title Video yükleme ve görselleştirme fonksiyonlarını tanımlayın { display-mode: "form" }

# CV2 kullanarak video dosyalarını açmak için yardımcı programlar.
def crop_center_square(frame):
  y, x = frame.shape[0:2]
  min_dim = min(y, x)
  start_x = (x // 2) - (min_dim // 2)
  start_y = (y // 2) - (min_dim // 2)
  return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]


def load_video(video_url, max_frames=32, resize=(224, 224)):
  path = tf.keras.utils.get_file(os.path.basename(video_url)[-128:], video_url)
  cap = cv2.VideoCapture(path)
  frames = []
  try:
    while True:
      ret, frame = cap.read()
      if not ret:
        break
      frame = crop_center_square(frame)
      frame = cv2.resize(frame, resize)
      frame = frame[:, :, [2, 1, 0]]
      frames.append(frame)

      if len(frames) == max_frames:
        break
  finally:
    cap.release()
  frames = np.array(frames)
  if len(frames) < max_frames:
    n_repeat = int(math.ceil(max_frames / float(len(frames))))
    frames = frames.repeat(n_repeat, axis=0)
  frames = frames[:max_frames]
  return frames / 255.0

def display_video(urls):
    html = '<table>'
    html += '<tr><th>Video 1</th><th>Video 2</th><th>Video 3</th></tr><tr>'
    for url in urls:
        html += '<td>'
        html += '<img src="{}" height="224">'.format(url)
        html += '</td>'
    html += '</tr></table>'
    return display.HTML(html)

def display_query_and_results_video(query, urls, scores):
  """Display a text query and the top result videos and scores."""
  sorted_ix = np.argsort(-scores)
  html = ''
  html += '<h2>Input query: <i>{}</i> </h2><div>'.format(query)
  html += 'Results: <div>'
  html += '<table>'
  for i, idx in enumerate(sorted_ix):
    url = urls[sorted_ix[i]]
    score = scores[sorted_ix[i]]
    html += '<tr><td>Rank #{}, Score:{:.2f}</td>'.format(i+1, score)
    html += '<td><img src="{}" height="224"></td></tr>'.format(url)
  html += '</table>'
  return html

In [13]:
# @title Örnek videoları yükleyin ve metin sorgularını tanımlayın { display-mode: "form" }

video_1_url = 'https://upload.wikimedia.org/wikipedia/commons/b/b0/YosriAirTerjun.gif'
video_2_url = 'https://upload.wikimedia.org/wikipedia/commons/e/e6/Guitar_solo_gif.gif'
video_3_url = 'https://upload.wikimedia.org/wikipedia/commons/3/30/2009-08-16-autodrift-by-RalfR-gif-by-wau.gif'
video_4_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/7/7f/Rotating_earth_animated_transparent.gif/640px-Rotating_earth_animated_transparent.gif'

video_1 = load_video(video_1_url)
video_2 = load_video(video_2_url)
video_3 = load_video(video_3_url)
video_4 = load_video(video_4_url)

all_videos = [video_1, video_2, video_3, video_4]

query_1_video = 'Waterfall'
query_2_video = 'Playing Guitar'
query_3_video = 'Drifting Car'
query_4_video = 'Rotating Earth'

all_queries_video = [query_1_video, query_2_video, query_3_video, query_4_video]
all_videos_urls = [video_1_url, video_2_url, video_3_url, video_4_url]

display_video(all_videos_urls)


Video 1,Video 2,Video 3,Unnamed: 3
,,,


In [14]:
# Video girişlerini hazırlayın.
videos_np = np.stack(all_videos, axis=0)

# Metin girişini hazırlayın.
words_np = np.array(all_queries_video)

# Video ve metin gömme (embedding) vektörlerini oluşturun.
video_embd, text_embd = generate_embeddings(hub_model, videos_np, words_np)

# Video ve metin arasındaki benzerlik puanları nokta çarpımı ile hesaplanır.
all_scores = np.dot(text_embd, tf.transpose(video_embd))


In [15]:
# Sonuçları görüntüleyin.
html = ''
for i, words in enumerate(words_np):
  html += display_query_and_results_video(words, all_videos_urls, all_scores[i, :])
  html += '<br>'
display.HTML(html)


0,1
"Rank #1, Score:4.71",
"Rank #2, Score:-1.07",
"Rank #3, Score:-1.63",
"Rank #4, Score:-4.17",

0,1
"Rank #1, Score:6.50",
"Rank #2, Score:-1.79",
"Rank #3, Score:-2.67",
"Rank #4, Score:-3.32",

0,1
"Rank #1, Score:8.78",
"Rank #2, Score:-1.07",
"Rank #3, Score:-2.17",
"Rank #4, Score:-2.17",

0,1
"Rank #1, Score:7.74",
"Rank #2, Score:3.32",
"Rank #3, Score:-1.66",
"Rank #4, Score:-3.24",
