In [1]:
%cd ..

/kaggle/working


In [3]:
from hydra import compose, initialize
from omegaconf import OmegaConf

with initialize(version_base=None, config_path="../cand_unsupervised/feat_image_num"):
    cfg = compose(config_name="config.yaml", overrides=["debug=True"])
    print(OmegaConf.to_yaml(cfg))

debug: true
seed: 7
dir:
  data_dir: /kaggle/working/input/atmaCup16_Dataset
  output_dir: /kaggle/working/output
  exp_dir: /kaggle/working/output/exp
  cand_unsupervised_dir: /kaggle/working/output/cand_unsupervised
  cand_supervised_dir: /kaggle/working/output/cand_supervised
  datasets_dir: /kaggle/working/output/datasets
exp:
  num_candidate: 100
  k:
  - 1
  - 5
  - 10
  - 50
  - 100



In [4]:
import os
import sys
from pathlib import Path

import hydra
import numpy as np
import polars as pl
import torch
from hydra.core.hydra_config import HydraConfig
from omegaconf import DictConfig, OmegaConf
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import LabelEncoder
from tqdm.auto import tqdm

import utils
import wandb
from utils.load import (
    load_image_embeddings,
    load_label_data,
    load_log_data,
    load_session_data,
)
from utils.metrics import calculate_metrics

In [8]:
image_df = load_image_embeddings(Path(cfg.dir.data_dir), columns=["yad_no", "category"])

In [10]:
image_count_df = (
    image_df.group_by(["yad_no", "category"])
    .agg(pl.col("category").count().alias("counts"))
    .pivot(values="counts", index="yad_no", columns="category")
    .with_columns(
        pl.sum_horizontal(["room", "others", "exterior", "food", "facility"]).alias(
            "sum_image_num"
        )
    )
)

In [11]:
image_count_df

yad_no,exterior,food,room,others,facility,sum_image_num
i64,u32,u32,u32,u32,u32,u32
27,2,1,3,3,,9
103,3,3,3,3,3,15
122,3,3,3,3,3,15
126,3,3,2,3,3,14
140,3,3,3,3,3,15
196,3,3,3,3,3,15
228,3,3,3,1,3,13
229,3,,3,3,3,12
244,2,2,3,3,3,13
254,3,3,3,3,3,15
