In [11]:
import pandas as pd

attention_data = pd.read_csv("attention_data_final.csv")
print(attention_data.shape)
attention_data.head()

(176081, 8)


Unnamed: 0,video id,Scene Number,Start Timecode,Start Time (seconds),End Timecode,End Time (seconds),Length (timecode),Length (seconds)
0,7231636207368929281,1,00:00:00.000,0.0,00:00:02.736,2.736,00:00:02.736,2.736
1,7231636207368929281,2,00:00:02.736,2.736,00:00:05.272,5.272,00:00:02.536,2.536
2,7231636207368929281,3,00:00:05.272,5.272,00:00:06.907,6.907,00:00:01.635,1.635
3,7231636207368929281,4,00:00:06.907,6.907,00:00:10.043,10.043,00:00:03.136,3.136
4,7241901987004579842,1,00:00:00.000,0.0,00:00:03.367,3.367,00:00:03.367,3.367


In [12]:
from tqdm import tqdm
import os

video_ids = attention_data['video id'].unique()

rows_to_delete = []

for video_id in tqdm(video_ids):
    video_scenes = attention_data[attention_data['video id'] == video_id]
    
    if len(video_scenes) == 1:
        file_path = f"scene_cuts/{video_id}/scene-001.jpg"
        if not os.path.exists(file_path):
            rows_to_delete.append(video_scenes.index[0])
    else:
        for index, scene in video_scenes.iterrows():
            scene_number = scene['Scene Number']
            file_path = f"scene_cuts/{video_id}/{video_id}-Scene-{scene_number:03d}-01.jpg"
            
            if not os.path.exists(file_path):
                rows_to_delete.append(index)

attention_data = attention_data.drop(rows_to_delete)
attention_data.to_csv('attention_data.csv', index=False)

100%|██████████| 18977/18977 [00:14<00:00, 1341.66it/s]


In [2]:
import pandas as pd

final_attention_data = pd.read_csv('attention_data.csv')
final_attention_data.shape

(176060, 8)

In [4]:
# INPUT: a folder that contains scenes splitted from an advertising video
# the name of the folder is the video id

### DESCRIPTION OF THE FOLLOWING CODES ###
# to estimate human attention within the scene
# Human attention is guided by meaning maps (semantic richness) stated by T.R. Henderson (2017)
# we aim to simulates their lab experiment procedure by
# 1) first separating the scene image into several patches

# output dir: for patch, cuts_patch/[video_id]
#             for metadata, metadata/[video_id]

import os
import numpy as np
import pandas as pd
from PIL import Image, ImageDraw
from tqdm import tqdm

def create_circular_mask(h, w, center=None, radius=None):
    if center is None:  # use the middle of the image
        center = (int(w/2), int(h/2))
    if radius is None:  # use the smallest distance between the center and image walls
        radius = min(center[0], center[1], w-center[0], h-center[1])

    Y, X = np.ogrid[:h, :w]
    dist_from_center = np.sqrt((X - center[0])**2 + (Y - center[1])**2)

    mask = dist_from_center <= radius
    return mask

def extract_circular_patches(scene_name, image, degrees, overlap=0.1, save_dir_patch="raw/test", save_dir_meta="raw/test"):
    if not os.path.exists(save_dir_patch):
        os.makedirs(save_dir_patch)

    h, w, _ = image.shape
    patch_count = 0
    metadata = []
    for degree in degrees:
        radius = degree_to_pixel(degree, h, w)
        step = int(radius * (1 - overlap))
        for y in range(0, h, step):
            for x in range(0, w, step):
                mask = create_circular_mask(h, w, center=(x, y), radius=radius)
                patch = np.zeros_like(image)
                patch[mask] = image[mask]

                # Find the bounding box of the circular patch
                coords = np.argwhere(mask)
                y_min, x_min = coords.min(axis=0)
                y_max, x_max = coords.max(axis=0)

                # Crop the patch to the bounding box
                cropped_patch = patch[y_min:y_max+1, x_min:x_max+1]
                patch_image = Image.fromarray(cropped_patch)
                patch_filename = f'patch_{patch_count}_deg_{degree}.png'
                patch_image.save(os.path.join(save_dir_patch, patch_filename))

                # Save metadata for reconstruction
                metadata.append({
                    'filename': patch_filename,
                    'center': (x, y),
                    'radius': radius,
                    'bbox': (x_min, y_min, x_max, y_max)
                })

                patch_count += 1

    # Save metadata to a file
    np.save(os.path.join(save_dir_meta, f'{scene_name}_metadata.npy'), metadata)

def degree_to_pixel(degree, h, w):
    # Assuming the image represents a certain field of view, convert degrees to pixels
    # This is a placeholder function and should be adjusted based on actual FOV and image dimensions
    fov = 90  # Example field of view in degrees
    return int((degree / fov) * min(h, w))

def reconstruct_image(metadata_file, original_shape):
    metadata = np.load(metadata_file, allow_pickle=True)
    reconstructed_image = np.zeros(original_shape, dtype=np.uint8)

    for data in metadata:
        patch_image = Image.open(os.path.join(os.path.dirname(metadata_file), data['filename']))
        patch_array = np.array(patch_image)

        x_min, y_min, x_max, y_max = data['bbox']
        mask = create_circular_mask(original_shape[0], original_shape[1], center=data['center'], radius=data['radius'])
        mask_cropped = mask[y_min:y_max+1, x_min:x_max+1]

        reconstructed_image[y_min:y_max+1, x_min:x_max+1][mask_cropped] = patch_array[mask_cropped]

    return Image.fromarray(reconstructed_image)


attention_data = pd.read_csv('attention_data.csv')
video_ids = attention_data['video id'].unique()

for video_id in tqdm(video_ids):
    video_id = str(video_id)
    scenes_path = os.path.join("scene_cuts", video_id)
    for scene_filename in tqdm(os.listdir(scenes_path)):
        scene_path = os.path.join(scenes_path, scene_filename)
        scene_name, _ = os.path.splitext(scene_filename)

        # Load the scene image
        scene_image = Image.open(scene_path)
        scene_image_np = np.array(scene_image)
        
        output_patch_dir = os.path.join("cuts_patch", video_id)
        output_metadata_dir = os.path.join("cuts_metadata", video_id)
        
        if not os.path.exists(output_patch_dir):
            os.makedirs(output_patch_dir)
        if not os.path.exists(output_metadata_dir):
            os.makedirs(output_metadata_dir)

        # Define the output directories for patches and metadata
        scene_patch_dir = os.path.join(output_patch_dir, scene_name)
        metadata_dir = output_metadata_dir

        # Extract patches and save metadata
        extract_circular_patches(scene_name, scene_image_np, degrees=[3, 7],
                                 save_dir_patch=scene_patch_dir,
                                 save_dir_meta=metadata_dir)

  0%|          | 0/18977 [00:00<?, ?it/s]
  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:15<00:45, 15.29s/it][A
 50%|█████     | 2/4 [00:30<00:30, 15.21s/it][A
 75%|███████▌  | 3/4 [00:45<00:15, 15.18s/it][A
100%|██████████| 4/4 [01:00<00:00, 15.22s/it][A
  0%|          | 1/18977 [01:00<320:55:59, 60.89s/it]
  0%|          | 0/13 [00:00<?, ?it/s][A
  8%|▊         | 1/13 [00:17<03:29, 17.46s/it][A
 15%|█▌        | 2/13 [00:34<03:12, 17.46s/it][A
 23%|██▎       | 3/13 [00:52<02:55, 17.52s/it][A
 31%|███       | 4/13 [01:09<02:37, 17.48s/it][A
 38%|███▊      | 5/13 [01:27<02:20, 17.58s/it][A
 46%|████▌     | 6/13 [01:45<02:02, 17.53s/it][A
 54%|█████▍    | 7/13 [02:02<01:45, 17.54s/it][A
 62%|██████▏   | 8/13 [02:20<01:27, 17.50s/it][A
 69%|██████▉   | 9/13 [02:37<01:10, 17.55s/it][A
 77%|███████▋  | 10/13 [02:55<00:52, 17.46s/it][A
 85%|████████▍ | 11/13 [03:12<00:35, 17.52s/it][A
 92%|█████████▏| 12/13 [03:30<00:17, 17.50s/it][A
100%|██████████| 13/13

KeyboardInterrupt: 

In [6]:
# THIS IS THE VERSION WITH GPU ACCELERATION

import os
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
import cupy as cp
import cv2

def create_circular_mask(h, w, center, radius):
    Y, X = cp.ogrid[:h, :w]
    dist_from_center = cp.sqrt((X - center[0])**2 + (Y - center[1])**2)
    mask = dist_from_center <= radius
    return mask

def extract_circular_patches(scene_name, image, degrees, overlap=0.1, save_dir_patch="raw/test", save_dir_meta="raw/test"):
    if not os.path.exists(save_dir_patch):
        os.makedirs(save_dir_patch)

    h, w, _ = image.shape
    patch_count = 0
    metadata = []

    # Move image to GPU
    image_gpu = cp.asarray(image)

    for degree in degrees:
        radius = degree_to_pixel(degree, h, w)
        step = int(radius * (1 - overlap))
        for y in range(0, h, step):
            for x in range(0, w, step):
                mask = create_circular_mask(h, w, (x, y), radius)
                patch = cp.zeros_like(image_gpu)
                patch[mask] = image_gpu[mask]

                # Find the bounding box of the circular patch
                coords = cp.argwhere(mask)
                y_min, x_min = coords.min(axis=0)
                y_max, x_max = coords.max(axis=0)

                # Crop the patch to the bounding box
                cropped_patch = patch[y_min:y_max+1, x_min:x_max+1]
                patch_image = Image.fromarray(cp.asnumpy(cropped_patch))
                patch_filename = f'patch_{patch_count}_deg_{degree}.png'
                patch_image.save(os.path.join(save_dir_patch, patch_filename))

                # Save metadata for reconstruction
                metadata.append({
                    'filename': patch_filename,
                    'center': (int(x), int(y)),
                    'radius': int(radius),
                    'bbox': (int(x_min), int(y_min), int(x_max), int(y_max))
                })

                patch_count += 1

    # Save metadata to a file
    np.save(os.path.join(save_dir_meta, f'{scene_name}_metadata.npy'), metadata)

def degree_to_pixel(degree, h, w):
    fov = 90  # Example field of view in degrees
    return int((degree / fov) * min(h, w))

def reconstruct_image(metadata_file, original_shape):
    metadata = np.load(metadata_file, allow_pickle=True)
    reconstructed_image = cp.zeros(original_shape, dtype=cp.uint8)

    for data in metadata:
        patch_image = cv2.imread(os.path.join(os.path.dirname(metadata_file), data['filename']))
        patch_image = cv2.cvtColor(patch_image, cv2.COLOR_BGR2RGB)
        patch_array = cp.asarray(patch_image)

        x_min, y_min, x_max, y_max = data['bbox']
        mask = create_circular_mask(original_shape[0], original_shape[1], data['center'], data['radius'])
        mask_cropped = mask[y_min:y_max+1, x_min:x_max+1]

        reconstructed_image[y_min:y_max+1, x_min:x_max+1][mask_cropped] = patch_array[mask_cropped]

    return Image.fromarray(cp.asnumpy(reconstructed_image))

# Main execution
attention_data = pd.read_csv('attention_data.csv')
video_ids = attention_data['video id'].unique()

for video_id in tqdm(video_ids):
    video_id = str(video_id)
    scenes_path = os.path.join("scene_cuts", video_id)
    for scene_filename in tqdm(os.listdir(scenes_path)):
        scene_path = os.path.join(scenes_path, scene_filename)
        scene_name, _ = os.path.splitext(scene_filename)

        # Load the scene image using OpenCV for faster loading
        scene_image_np = cv2.imread(scene_path)
        scene_image_np = cv2.cvtColor(scene_image_np, cv2.COLOR_BGR2RGB)
        
        output_patch_dir = os.path.join("cuts_patch", video_id)
        output_metadata_dir = os.path.join("cuts_metadata", video_id)
        
        if not os.path.exists(output_patch_dir):
            os.makedirs(output_patch_dir)
        if not os.path.exists(output_metadata_dir):
            os.makedirs(output_metadata_dir)

        # Define the output directories for patches and metadata
        scene_patch_dir = os.path.join(output_patch_dir, scene_name)
        metadata_dir = output_metadata_dir

        # Extract patches and save metadata
        extract_circular_patches(scene_name, scene_image_np, degrees=[3, 7],
                                 save_dir_patch=scene_patch_dir,
                                 save_dir_meta=metadata_dir)

  0%|          | 0/18977 [00:00<?, ?it/s]
  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:07<00:23,  7.84s/it][A
 50%|█████     | 2/4 [00:11<00:11,  5.65s/it][A
 75%|███████▌  | 3/4 [00:16<00:04,  4.98s/it][A
100%|██████████| 4/4 [00:20<00:00,  5.08s/it][A
  0%|          | 1/18977 [00:20<107:10:41, 20.33s/it]
  0%|          | 0/13 [00:00<?, ?it/s][A
  8%|▊         | 1/13 [00:03<00:47,  3.96s/it][A
 15%|█▌        | 2/13 [00:08<00:45,  4.13s/it][A
 23%|██▎       | 3/13 [00:12<00:41,  4.17s/it][A
 31%|███       | 4/13 [00:16<00:37,  4.20s/it][A
 38%|███▊      | 5/13 [00:21<00:33,  4.25s/it][A
 46%|████▌     | 6/13 [00:25<00:29,  4.24s/it][A
 54%|█████▍    | 7/13 [00:29<00:25,  4.22s/it][A
 62%|██████▏   | 8/13 [00:33<00:21,  4.22s/it][A
 69%|██████▉   | 9/13 [00:41<00:18,  4.64s/it][A
  0%|          | 1/18977 [01:02<327:25:10, 62.12s/it]


KeyboardInterrupt: 

In [None]:
# 3RD VERSION: make further use of GPU

import os
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
import cupy as cp
import cv2
import logging
from datetime import datetime

def create_circular_masks(h, w, centers, radii):
    Y, X = cp.ogrid[:h, :w]
    centers = cp.asarray(centers)
    radii = cp.asarray(radii)
    
    X = X[:, :, cp.newaxis] - centers[:, 0]
    Y = Y[:, :, cp.newaxis] - centers[:, 1]
    
    dist_from_centers = cp.sqrt(X**2 + Y**2)
    masks = dist_from_centers <= radii
    return masks

def extract_circular_patches(scene_name, image, degrees, overlap=0.1, save_dir_patch="raw/test", save_dir_meta="raw/test", batch_size=100):
    if not os.path.exists(save_dir_patch):
        os.makedirs(save_dir_patch)

    h, w, _ = image.shape
    metadata = []

    # Move image to GPU
    image_gpu = cp.asarray(image)

    for degree in degrees:
        radius = degree_to_pixel(degree, h, w)
        step = int(radius * (1 - overlap))
        
        centers = [(x, y) for y in range(0, h, step) for x in range(0, w, step)]
        radii = [radius] * len(centers)
        
        # Process in batches
        for i in range(0, len(centers), batch_size):
            batch_centers = centers[i:i+batch_size]
            batch_radii = radii[i:i+batch_size]
            
            masks = create_circular_masks(h, w, batch_centers, batch_radii)
            
            for j, (center, mask) in enumerate(zip(batch_centers, masks.transpose(2, 0, 1))):
                patch = cp.zeros_like(image_gpu)
                patch[mask] = image_gpu[mask]

                coords = cp.argwhere(mask)
                y_min, x_min = coords.min(axis=0)
                y_max, x_max = coords.max(axis=0)

                cropped_patch = patch[y_min:y_max+1, x_min:x_max+1]
                patch_image = Image.fromarray(cp.asnumpy(cropped_patch))
                patch_filename = f'patch_{i+j}_deg_{degree}.png'
                patch_image.save(os.path.join(save_dir_patch, patch_filename))

                metadata.append({
                    'filename': patch_filename,
                    'center': center,
                    'radius': int(radius),
                    'bbox': (int(x_min), int(y_min), int(x_max), int(y_max))
                })
            
            # Clear GPU memory
            cp.get_default_memory_pool().free_all_blocks()

    # Save metadata to a file
    np.save(os.path.join(save_dir_meta, f'{scene_name}_metadata.npy'), metadata)

def degree_to_pixel(degree, h, w):
    fov = 90  # Example field of view in degrees
    return int((degree / fov) * min(h, w))

def reconstruct_image(metadata_file, original_shape):
    metadata = np.load(metadata_file, allow_pickle=True)
    reconstructed_image = cp.zeros(original_shape, dtype=cp.uint8)

    centers = [data['center'] for data in metadata]
    radii = [data['radius'] for data in metadata]
    masks = create_circular_masks(original_shape[0], original_shape[1], centers, radii)

    for i, data in enumerate(metadata):
        patch_image = cv2.imread(os.path.join(os.path.dirname(metadata_file), data['filename']))
        patch_image = cv2.cvtColor(patch_image, cv2.COLOR_BGR2RGB)
        patch_array = cp.asarray(patch_image)

        x_min, y_min, x_max, y_max = data['bbox']
        mask_cropped = masks[y_min:y_max+1, x_min:x_max+1, i]

        reconstructed_image[y_min:y_max+1, x_min:x_max+1][mask_cropped] = patch_array[mask_cropped]

    return Image.fromarray(cp.asnumpy(reconstructed_image))


# Set up logging
log_dir = "logs"
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

log_file = os.path.join(log_dir, f"processing_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
logging.basicConfig(filename=log_file, level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')

# Redirect tqdm output to log file
class TqdmToLogger(object):
    def __init__(self, logger, level=logging.INFO):
        self.logger = logger
        self.level = level
        self.last_msg = ''

    def write(self, buf):
        self.last_msg = buf.strip()

    def flush(self):
        self.logger.log(self.level, self.last_msg)

tqdm_logger = TqdmToLogger(logging.getLogger())

# Main execution
logging.info("Starting processing")
attention_data = pd.read_csv('attention_data.csv')
video_ids = attention_data['video id'].unique()

for video_id in tqdm(video_ids[3162:], desc="Processing videos", file=tqdm_logger):
    video_id = str(video_id)
    scenes_path = os.path.join("scene_cuts", video_id)
    output_patch_dir = os.path.join("cuts_patch", video_id)
    output_metadata_dir = os.path.join("cuts_metadata", video_id)
    
    if not os.path.exists(output_patch_dir):
        os.makedirs(output_patch_dir)
    if not os.path.exists(output_metadata_dir):
        os.makedirs(output_metadata_dir)

    # Get list of already processed scenes
    processed_scenes = set(os.path.splitext(f)[0] for f in os.listdir(output_metadata_dir) if f.endswith('_metadata.npy'))

    for scene_filename in tqdm(os.listdir(scenes_path), desc=f"Processing scenes for video {video_id}", leave=False, file=tqdm_logger):
        scene_name, _ = os.path.splitext(scene_filename)
        
        # Skip if this scene has already been processed
        if scene_name in processed_scenes:
            logging.info(f"Skipping already processed scene: {scene_name}")
            continue

        scene_path = os.path.join(scenes_path, scene_filename)

        # Load the scene image using OpenCV for faster loading
        scene_image_np = cv2.imread(scene_path)
        if scene_image_np is None:
            logging.error(f"Failed to load image: {scene_path}")
            continue
        scene_image_np = cv2.cvtColor(scene_image_np, cv2.COLOR_BGR2RGB)

        # Define the output directories for patches and metadata
        scene_patch_dir = os.path.join(output_patch_dir, scene_name)
        metadata_dir = output_metadata_dir

        try:
            # Extract patches and save metadata
            extract_circular_patches(scene_name, scene_image_np, degrees=[3, 7],
                                     save_dir_patch=scene_patch_dir,
                                     save_dir_meta=metadata_dir,
                                     batch_size=100)  # Adjust batch_size as needed
            logging.info(f"Successfully processed scene: {scene_name}")
        except Exception as e:
            logging.error(f"Error processing scene {scene_name}: {str(e)}")
            continue

    # Clear GPU memory after processing each video
    cp.get_default_memory_pool().free_all_blocks()

logging.info("Processing completed.")

# Print the location of the log file
print(f"Processing complete. Log file is located at: {log_file}")

In [8]:
import os

data = os.listdir("cuts_patch")
print(data[0])

6930567333828820993


In [None]:
# 2) then, store the patch information to .csv file
# output dir: patch_info_dir

import os
import csv
import numpy as np
from PIL import Image, ImageDraw
from tqdm import tqdm

def calculate_patch_scores_for_scenes(patch_data_folder, metadata_folder, output_csv):
    # Prepare CSV file
    with open(output_csv, mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['scene', 'filename', 'center_x', 'center_y', 'radius', 'bbox_x_min', 'bbox_y_min', 'bbox_x_max', 'bbox_y_max', 'mean_score'])

        # Iterate through each scene
        for scene_name in os.listdir(patch_data_folder):
            scene_folder = os.path.join(patch_data_folder, scene_name)
            metadata_file = os.path.join(metadata_folder, f"{scene_name}_metadata.npy")

            if not os.path.exists(metadata_file):
                print(f"Metadata file for scene {scene_name} not found, skipping.")
                continue

            # Load metadata
            metadata = np.load(metadata_file, allow_pickle=True)

            # Iterate through each patch
            for data in metadata:
                patch_image = Image.open(os.path.join(scene_folder, data['filename'])).convert('L')
                patch_array = np.array(patch_image)

                # Write data to CSV
                writer.writerow([
                    scene_name,
                    data['filename'],
                    data['center'][0], data['center'][1],
                    data['radius'],
                    data['bbox'][0], data['bbox'][1], data['bbox'][2], data['bbox'][3],
                ])

                
cuts_patch_folder = 'cuts_patch'
cuts_metadata_folder = 'cuts_metadata'


cuts_patch_files = os.listdir(cuts_patch_folder)
for video_id in tqdm(cuts_patch_files[:500], desc="Processing videos"):
    patch_data_folder = os.path.join(cuts_patch_folder, video_id)
    metadata_folder = os.path.join(cuts_metadata_folder, video_id)
    output_csv = f'patch_info_dir/patch_info_{video_id}.csv'
    calculate_patch_scores_for_scenes(patch_data_folder, metadata_folder, output_csv)

Processing videos:  75%|███████▌  | 377/500 [44:28<11:11,  5.46s/it]  