#### 무변환 패치 자르기

In [None]:
import torch
import numpy as np
from PIL import Image

def extract_sliding_patches(
    pil_img: Image.Image,
    fov: float,
    overlap_ratio: float
) -> torch.Tensor:
    """
    Args:
        pil_img (PIL.Image.Image): 입력 equirectangular 파노라마 이미지
        fov (float): 한 패치의 시야각(FOV, degree 단위). ex) 90.0
        overlap_ratio (float): 패치 간 겹침 비율 (0.0 ~ 1.0)

    Returns:
        torch.Tensor: (N, C, patch_h, patch_w) 형태의 패치 텐서 모음
    """
    # 1) PIL → Torch 텐서 변환 (C, H2, W4), 값 범위 [0,1]
    img_arr = np.array(pil_img)                              # (H2, W4, 3)
    img_tensor = torch.from_numpy(img_arr).permute(2, 0, 1).float() / 255.0
    # → (C=3, H2, W4)

    C, H2, W4 = img_tensor.shape

    # 2) 패치 높이(patch_h)와 패치 너비(patch_w) 계산
    #    (예시에서는 patch_h를 H2/4로 가정 가능하나, 원본 코드는 외부에서 지정됨)
    #    여기서는 단순히 “중앙 영역 높이”를 patch_h로 사용한다고 가정
    patch_h = int(np.floor(H2 / 2.0))  # 예: 중앙 50% 높이만 사용하기 위한 임시 값
    patch_w = int(W4 * (fov / 360.0))  # FOV 기반으로 패치 가로 픽셀 수 결정

    # 3) 스트라이드 계산 (겹침 비율 반영)
    stride_h = max(int(patch_h * (1.0 - overlap_ratio)), 1)
    stride_w = max(int(patch_w * (1.0 - overlap_ratio)), 1)

    # 4) 상단 H2//4, 하단 H2//4 영역 제외할 세로 범위 계산
    y_min = int(np.ceil(H2 / 4.0))
    y_max = int(H2 - np.floor(H2 / 4.0) - patch_h)

    patches = []
    if y_max >= y_min:
        for y in range(y_min, y_max + 1, stride_h):
            for x in range(0, W4, stride_w):
                if x + patch_w <= W4:
                    # 우측 범위 내에 완전히 포함되는 경우
                    patch = img_tensor[:, y : y + patch_h, x : x + patch_w]
                else:
                    # 우측을 넘어가는 경우: 우측 일부 + 좌측 일부 이어붙이기 (래핑)
                    right_part = img_tensor[:, y : y + patch_h, x : W4]
                    left_part  = img_tensor[:, y : y + patch_h, 0 : (x + patch_w) - W4]
                    patch = torch.cat([right_part, left_part], dim=2)

                patches.append(patch)

    # 5) (N, C, patch_h, patch_w) 형태로 합치기
    if len(patches) == 0:
        # 유효 패치가 없는 경우, 빈 텐서 반환
        return torch.zeros((0, C, patch_h, patch_w))
    else:
        return torch.stack(patches, dim=0)

In [None]:
from transformers import AutoImageProcessor, AutoModel
from PIL import Image
import requests
from src.utils import visualize_tensor_batch
from train_vlm import QuIC360Dataset
from transformers import AutoProcessor, AutoTokenizer
import torch
import numpy as np

processor = AutoProcessor.from_pretrained("facebook/dinov2-small")
print(processor)
tokenzier = AutoTokenizer.from_pretrained("google/gemma-3-4b-it")
img = Image.open("data/quic360/downtest/images/540231919_58d07745aa_o.jpg").convert("RGB")
img = img.resize((224*4, 224*2), Image.BILINEAR)
img_tensor = torch.from_numpy(np.array(img)).permute(2, 0, 1).float() / 255.0
img_tensor = img_tensor.unsqueeze(0)  # (1, C, H2, W4)
dataset = QuIC360Dataset(
    csv_file="data/quic360/downtest.csv",
    image_processor= processor,
    tokenizer=tokenzier,
    image_size=[224, 224],
    max_length=128,
    do_crop=True,
    fov=90,
    overlap_ratio=0.5,
)
imgs = dataset.crop_equirectangular_tensor(img_tensor)
visualize_tensor_batch(imgs)
sample = processor(images=imgs, return_tensors="pt")
model = AutoModel.from_pretrained('facebook/dinov2-small',cache_dir='./.cache')
outputs = model(sample['pixel_values'])
last_hidden_states = outputs.last_hidden_state
last_hidden_states = last_hidden_states.cpu().detach().numpy()  

### 무변환 크롭 특징 추출

In [None]:
from transformers import AutoImageProcessor, AutoModel
from PIL import Image
import requests
# from train import QuIC360Dataset
from transformers import AutoProcessor, AutoTokenizer
from src.utils import visualize_tensor_batch

processor = AutoProcessor.from_pretrained("facebook/dinov2-small")
model = AutoModel.from_pretrained('facebook/dinov2-small',cache_dir='./.cache')
img = Image.open("data/quic360/downtest/images/540231919_58d07745aa_o.jpg").convert("RGB")
imgs = extract_sliding_patches(
    pil_img=img,
    fov=90.0,
    overlap_ratio=0.5
)
visualize_tensor_batch(imgs)
sample = processor(
    images=imgs,
    return_tensors="pt",
)

outputs = model(sample['pixel_values'])
last_hidden_states = outputs.last_hidden_state
last_hidden_states = last_hidden_states.cpu().detach().numpy()  

### 큐브맵 변환 특징추출

In [None]:
import torch
import torch.nn.functional as F
import numpy as np
from PIL import Image

def extract_edge_patches_from_panorama(
    pil_img: Image.Image,
    patch_size: int
) -> torch.Tensor:
    """
    equirectangular 파노라마 이미지를 변환하여, 상·하단 면을 제외한 4개 면(front, right, back, left)에서
    서로 맞닿는 4개 모서리마다 각 면에서 하나씩, 총 8개의 정사각 패치를 추출하여 반환합니다.

    Args:
        pil_img (PIL.Image.Image):
            입력 equirectangular 파노라마 이미지
        patch_size (int):
            추출할 정사각 패치의 한 변 길이(픽셀).
            patch_size <= face_size 이어야 합니다. (face_size = H//2)

    Returns:
        torch.Tensor:
            (8, C, patch_size, patch_size) 형태의 패치 묶음. 순서는 아래와 같습니다.
                1. front 우측 (edge with right)
                2. right 앞쪽 (edge with front)
                3. right 우측 (edge with back)
                4. back  앞쪽 (edge with right)
                5. back  우측 (edge with left)
                6. left  앞쪽 (edge with back)
                7. left  우측 (edge with front)
                8. front 앞쪽 (edge with left)
    """
    # --- 1) PIL → Torch Tensor (C, H, W), [0,1] 정규화 ---
    img_arr    = np.array(pil_img)                              # (H, W, 3)
    img_tensor = torch.from_numpy(img_arr).permute(2, 0, 1).float() / 255.0
    C, H, W    = img_tensor.shape

    # --- 2) face_size 계산 (정사각 큐브 면 크기) ---
    face_size = H // 2
    assert patch_size <= face_size, "patch_size는 face 크기(face_size)보다 작거나 같아야 합니다."

    # --- 3) equirectangular → (1, C, H, W) ---
    eq_tensor = img_tensor.unsqueeze(0)

    # --- 4) u, v 그리드 생성 (각 면 크기: face_size × face_size) ---
    device = eq_tensor.device
    lin = torch.linspace(-1.0, 1.0, face_size, device=device)
    u_grid, v_grid = torch.meshgrid(lin, -lin, indexing="xy")

    # --- 5) 4개 면(front, right, back, left)의 방향벡터 계산 + 정규화 ---
    def _dir_face(name):
        if name == "+Z":   # front
            x = u_grid;     y = v_grid;     z = torch.ones_like(u_grid)
        elif name == "+X": # right
            x = torch.ones_like(u_grid);     y = v_grid;     z = -u_grid
        elif name == "-Z": # back
            x = -u_grid;    y = v_grid;     z = -torch.ones_like(u_grid)
        elif name == "-X": # left
            x = -torch.ones_like(u_grid);    y = v_grid;     z = u_grid
        else:
            raise ValueError("지원되지 않는 면 이름")
        vec = torch.stack([x, y, z], dim=-1)      # (face_size, face_size, 3)
        norm = torch.linalg.norm(vec, dim=-1, keepdim=True)
        return vec / norm                         # (face_size, face_size, 3)

    face_order = ["+Z", "+X", "-Z", "-X"]  # [front, right, back, left]
    dirs = torch.stack([_dir_face(f) for f in face_order], dim=0)  
    # → (4, face_size, face_size, 3)

    # --- 6) 구면 좌표(lon, lat)로 변환 ---
    x, y, z = dirs[..., 0], dirs[..., 1], dirs[..., 2]
    lon = torch.atan2(z, x)    # (4, face_size, face_size)
    lat = torch.asin(y)        # (4, face_size, face_size)

    # --- 7) equirectangular UV 매핑 (u_eq, v_eq) ---
    u_eq = (lon / (2.0 * np.pi) + 0.5) * (W - 1)
    v_eq = (0.5 - lat / np.pi) * (H - 1)

    # --- 8) grid_sample 입력용 [-1, +1] 정규화 그리드 생성 ---
    x_norm = (2.0 * u_eq / (W - 1)) - 1.0
    y_norm = (2.0 * v_eq / (H - 1)) - 1.0
    grid = torch.stack([x_norm, y_norm], dim=-1)  # (4, face_size, face_size, 2)

    # --- 9) grid_sample → 4개 면 텐서 추출 ---
    eq_repeat = eq_tensor.repeat(4, 1, 1, 1)  # (4, C, H, W)
    cube4 = F.grid_sample(
        eq_repeat,    # (4, C, H, W)
        grid,         # (4, face_size, face_size, 2)
        mode="bilinear",
        padding_mode="border",
        align_corners=True
    )
    # cube4: (4, C, face_size, face_size)

    # --- 10) 네 면에서 모서리 패치 추출 ---
    # 각 면 인덱스
    idx_front = 0  # +Z
    idx_right = 1  # +X
    idx_back  = 2  # -Z
    idx_left  = 3  # -X

    # 세로 중앙 영역 계산
    y_center = face_size // 2
    y_start  = y_center - (patch_size // 2)
    y_end    = y_start + patch_size  # exclusive

    front = cube4[idx_front]  # (C, F, F)
    right = cube4[idx_right]
    back  = cube4[idx_back]
    left  = cube4[idx_left]

    # 1) front 우측  <-> right 앞쪽
    front_patch_right = front[:, y_start : y_end, (face_size - patch_size) : face_size]
    right_patch_front = right[:, y_start : y_end, 0 : patch_size]

    # 2) right 우측  <-> back 앞쪽
    right_patch_right = right[:, y_start : y_end, (face_size - patch_size) : face_size]
    back_patch_front   = back[:, y_start : y_end, 0 : patch_size]

    # 3) back 우측  <-> left 앞쪽
    back_patch_right  = back[:, y_start : y_end, (face_size - patch_size) : face_size]
    left_patch_front  = left[:, y_start : y_end, 0 : patch_size]

    # 4) left 우측  <-> front 앞쪽
    left_patch_right  = left[:, y_start : y_end, (face_size - patch_size) : face_size]
    front_patch_front = front[:, y_start : y_end, 0 : patch_size]

    patches = [
        front_patch_right,  # 1)
        right_patch_front,  # 2)
        right_patch_right,  # 3)
        back_patch_front,   # 4)
        back_patch_right,   # 5)
        left_patch_front,   # 6)
        left_patch_right,   # 7)
        front_patch_front   # 8)
    ]

    return torch.stack(patches, dim=0)  # (8, C, patch_size, patch_size)

In [None]:
from transformers import AutoImageProcessor, AutoModel
from PIL import Image
import requests
# from train import QuIC360Dataset
from transformers import AutoProcessor, AutoTokenizer



processor = AutoProcessor.from_pretrained("facebook/dinov2-small")
model = AutoModel.from_pretrained('facebook/dinov2-small',cache_dir='./.cache')
img = Image.open("data/quic360/downtest/images/540231919_58d07745aa_o.jpg").convert("RGB")
imgs = extract_sliding_patches(
    pil_img=img,
    fov=90.0,
    overlap_ratio=0.5
)
sample = processor(
    images=imgs,
    return_tensors="pt",
)

outputs = model(sample['pixel_values'])
last_hidden_states = outputs.last_hidden_state
last_hidden_states = last_hidden_states.cpu().detach().numpy()  

In [None]:
from transformers import AutoImageProcessor, AutoModel
from PIL import Image
import requests
from src.utils import visualize_tensor_batch
# from train import QuIC360Dataset
from transformers import AutoProcessor, AutoTokenizer

processor = AutoProcessor.from_pretrained("facebook/dinov2-small")
model = AutoModel.from_pretrained('facebook/dinov2-small',cache_dir='./.cache')
img = Image.open("data/quic360/downtest/images/540231919_58d07745aa_o.jpg").convert("RGB")

imgs = extract_edge_patches_from_panorama(
    pil_img=img,
    patch_size=224
)
visualize_tensor_batch(imgs)
sample = processor(
    images=imgs,
    return_tensors="pt",
)

outputs = model(sample['pixel_values'])
last_hidden_states = outputs.last_hidden_state
last_hidden_states = last_hidden_states.cpu().detach().numpy()  