In [1]:
from utils import dataset_manager, model_manager
import os
import fiftyone as fo
from flask import Flask, request, jsonify
from routes.interact_with_csv_files import csv_routes
from deep_translator import GoogleTranslator
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# data_dir = os.path.join("..", "data") # remember to organize data folder as described in github
data_dir = '/Users/VoThinhPhat/Desktop/data'
dataset_manager = dataset_manager.Dataset(dataset_name='AIC_2024',
                                        data_dir=data_dir)
dataset_manager.load_metadata()
dataset = dataset_manager.get_fiftyone_dataset()

 100% |█████████████| 25731/25731 [2.1s elapsed, 0s remaining, 12.1K samples/s]      

1. Load video name and keyframe_id...
	L01_V031 with keyframe 0995 -- Finish
2. Set up frame idx
	L01_V024 is ready...
3. Set up clip_14_dict and task_former_dict
	L01_V024 is ready...
4. Load frame_id, clip-14, task-former
	L01_V031 is done...

In [3]:
# clip vit large patch14 model
model_clip14 = model_manager.CLIP_14_model()

# remember to download model_config_file and model_file (contact vtphatt2 for link)
model_config_file = os.path.join(os.getcwd(), 'task-former', 'code', 'training', 
                                 'model_configs', 'ViT-B-16.json')
model_file = os.path.join(os.getcwd(), 'task-former', 'model', 'tsbir_model_final.pt')
model_task_former = model_manager.TASK_former_model(model_config_file=model_config_file,
                                                    model_file=model_file)

Model openai-clip-vit-large-patch14 is using mps




Model TASK-former is using mps


  checkpoint = torch.load(model_file, map_location=loc)


In [4]:
def searchByText(text_query, k = 200):
    submission_list = []
    similarities = cosine_similarity([model_clip14.inference(text_query)], 
                                     dataset_manager.get_image_clip14_embeddings())[0]
    top_k_indices = similarities.argsort()[-k:][::-1]
    print(len(top_k_indices))
    image_samples = dataset_manager.get_image_samples()
    visited = [False] * k
    for i in range(0, k):
        if (not visited[i]):
            video_name = image_samples[top_k_indices[i]]['video']
            x = [video_name, [(image_samples[top_k_indices[i]]['filepath'], int(image_samples[top_k_indices[i]]['frame_id']))]]
            visited[i] = True
            for j in range(i + 1, k):
                if (not visited[j] and video_name == image_samples[top_k_indices[j]]['video']
                    and abs(int(image_samples[top_k_indices[i]]['keyframe_id']) - int(image_samples[top_k_indices[j]]['keyframe_id'])) < 8):
                    x[1].append((image_samples[top_k_indices[j]]['filepath'], int(image_samples[top_k_indices[j]]['frame_id'])))
                    visited[j] = True
            x[1] = sorted(x[1], key=lambda a:int(a[1]))
            submission_list.append(x)
    return submission_list

In [5]:
app = Flask(__name__)

# Register the blueprint with the main app
app.register_blueprint(csv_routes)

@app.route('/search_by_text', methods=['POST'])
def search_by_text():
    data = request.json
    search_text = data.get('searchText')
    translated_text = GoogleTranslator(source='vi', target='en').translate(search_text)
    submission_list = searchByText(translated_text, k=100)  # Ensure this returns an ordered dict if necessary

    response = jsonify({
        "translated_text": translated_text,
        "submission_list": submission_list  # Use a list instead of a dict
    })
    response.headers['Cache-Control'] = 'no-store, no-cache, must-revalidate, max-age=0'
    response.headers['Pragma'] = 'no-cache'
    response.headers['Expires'] = '0'

    return response, 200

app.run(debug=True, use_reloader=False)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [25/Sep/2024 16:36:57] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [25/Sep/2024 16:36:57] "GET /static/reload.js HTTP/1.1" 200 -
127.0.0.1 - - [25/Sep/2024 16:36:57] "[36mGET /static/zoom.js HTTP/1.1[0m" 304 -
127.0.0.1 - - [25/Sep/2024 16:36:57] "[36mGET /static/canvasDrawing.js HTTP/1.1[0m" 304 -
127.0.0.1 - - [25/Sep/2024 16:36:57] "[36mGET /static/styles.css HTTP/1.1[0m" 304 -
127.0.0.1 - - [25/Sep/2024 16:36:57] "[36mGET /static/interact_with_csv_files.js HTTP/1.1[0m" 304 -
127.0.0.1 - - [25/Sep/2024 16:36:57] "GET /static/search_mode.js HTTP/1.1" 200 -
127.0.0.1 - - [25/Sep/2024 16:36:57] "[36mGET /image/Users/VoThinhPhat/Desktop/data/batch1/keyframes/keyframes_L01/L01_V005/0327.jpg HTTP/1.1[0m" 304 -
127.0.0.1 - - [25/Sep/2024 16:36:57] "[36mGET /image/Users/VoThinhPhat/Desktop/data/batch1/keyframes/keyframes_L01/L01_V005/0323.jpg HTTP/1.1[0m" 304 -
127.0.0.1 - - [25/Sep/2024 16:36:57] "[36mGET

100


127.0.0.1 - - [25/Sep/2024 16:38:15] "GET /image/Users/VoThinhPhat/Desktop/data/batch1/keyframes/keyframes_L01/L01_V014/0449.jpg HTTP/1.1" 200 -
127.0.0.1 - - [25/Sep/2024 16:38:15] "GET /image/Users/VoThinhPhat/Desktop/data/batch1/keyframes/keyframes_L01/L01_V014/0453.jpg HTTP/1.1" 200 -
127.0.0.1 - - [25/Sep/2024 16:38:15] "GET /image/Users/VoThinhPhat/Desktop/data/batch1/keyframes/keyframes_L01/L01_V013/0269.jpg HTTP/1.1" 200 -
127.0.0.1 - - [25/Sep/2024 16:38:15] "GET /image/Users/VoThinhPhat/Desktop/data/batch1/keyframes/keyframes_L01/L01_V013/0270.jpg HTTP/1.1" 200 -
127.0.0.1 - - [25/Sep/2024 16:38:15] "GET /image/Users/VoThinhPhat/Desktop/data/batch1/keyframes/keyframes_L01/L01_V013/0271.jpg HTTP/1.1" 200 -
127.0.0.1 - - [25/Sep/2024 16:38:15] "GET /image/Users/VoThinhPhat/Desktop/data/batch1/keyframes/keyframes_L01/L01_V029/0083.jpg HTTP/1.1" 200 -
127.0.0.1 - - [25/Sep/2024 16:38:15] "GET /image/Users/VoThinhPhat/Desktop/data/batch1/keyframes/keyframes_L01/L01_V029/0084.jpg H