In [3]:
import re
import unicodedata
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer
import nltk

# Ensure you have downloaded the wordnet corpus from NLTK
nltk.download('wordnet')
nltk.download('omw-1.4')

def normalize_text(text):
    # Lowercase the text
    text = text.lower()
    
    # Remove punctuation (keep only letters and numbers)
    text = re.sub(r'[^\w\s]', '', text)
    
    # Trim leading, trailing, and multiple whitespaces
    text = re.sub(r'\s+', ' ', text).strip()
    
    # Normalize using NFKD (decompose combined graphemes)
    text = unicodedata.normalize('NFKD', text)
    
    # Strip accents
    text = ''.join([c for c in text if not unicodedata.combining(c)])
    
    # Lemmatize the text
    lemmatizer = WordNetLemmatizer()
    words = text.split()
    lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
    
    # Join the lemmatized words back into a sentence
    normalized_text = ' '.join(lemmatized_words)
    
    return normalized_text

# Example usage
sample_text = "Màlaga is a beautiful city! Noël walked quickly to catch the bus."
normalized_text = normalize_text(sample_text)


[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/yonglxie/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/yonglxie/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [6]:
print(sample_text, '\n', normalized_text)

Màlaga is a beautiful city! Noël walked quickly to catch the bus. 
 malaga is a beautiful city noel walked quickly to catch the bus


In [1]:
import os
import torch
import clip
import cv2
from PIL import Image

# Load the CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

# Function to extract frames from video
def extract_frames(video_path, frame_rate=1):
    video = cv2.VideoCapture(video_path)
    frames = []
    count = 0
    while True:
        ret, frame = video.read()
        if not ret:
            break
        if count % frame_rate == 0:  # Extract 1 frame per second
            frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))  # Convert to RGB
        count += 1
    video.release()
    return frames

# Function to get image embeddings from CLIP
def get_image_embeddings(frames):
    embeddings = []
    for frame in frames:
        image = Image.fromarray(frame)
        image = preprocess(image).unsqueeze(0).to(device)
        with torch.no_grad():
            image_features = model.encode_image(image)
            embeddings.append(image_features)
    return torch.cat(embeddings)

# Function to search videos based on text query
def search_videos(query, video_paths, frame_rate=1):
    # Get text embedding
    text = clip.tokenize([query]).to(device)
    with torch.no_grad():
        text_embedding = model.encode_text(text)

    # Iterate over each video
    results = []
    for video_path in video_paths:
        frames = extract_frames(video_path, frame_rate)
        video_embeddings = get_image_embeddings(frames)
        
        # Compute cosine similarity between text and video frames
        similarities = torch.nn.functional.cosine_similarity(text_embedding, video_embeddings)
        max_similarity = similarities.max().item()

        # Store the max similarity score and the corresponding video path
        results.append((max_similarity, video_path))
    
    # Sort videos by similarity
    results.sort(reverse=True, key=lambda x: x[0])
    return results

# Example usage:
video_dir = "/Users/yonglxie/Downloads"
video_files = [os.path.join(video_dir, f) for f in os.listdir(video_dir) if f.endswith('.mp4')]

# Text query to search
query = "a person riding a horse"

# Perform search
search_results = search_videos(query, video_files)

# Output the sorted video files based on similarity
for similarity, video_path in search_results:
    print(f"Video: {video_path}, Similarity: {similarity}")

Video: /Users/yonglxie/Downloads/arab.mp4, Similarity: 0.2508220672607422
Video: /Users/yonglxie/Downloads/金融行业智能化全渠道联络中心ConnectNow联合解决方案——多语言翻译机总结.mp4, Similarity: 0.20997147262096405
