In [1]:
%cd ..

/kaggle/working


In [2]:
from hydra import compose, initialize
from omegaconf import OmegaConf

with initialize(
    version_base=None, config_path="../cand_unsupervised/ranking_location"
):
    cfg = compose(config_name="config.yaml")
    print(OmegaConf.to_yaml(cfg))

debug: false
seed: 7
dir:
  data_dir: /kaggle/working/input/atmaCup16_Dataset
  output_dir: /kaggle/working/output
  exp_dir: /kaggle/working/output/exp
  cand_unsupervised_dir: /kaggle/working/output/cand_unsupervised
  cand_supervised_dir: /kaggle/working/output/cand_supervised
exp:
  num_candidate: 100
  k:
  - 1
  - 5
  - 10
  - 50
  - 100



In [3]:
from pathlib import Path
import polars as pl

In [4]:
import utils
from utils.load import load_label_data, load_log_data, load_yad_data
from utils.metrics import calculate_metrics

In [11]:
with utils.timer("load data"):
    train_log_df = load_log_data(Path(cfg.dir.data_dir), "train")
    test_log_df = load_log_data(Path(cfg.dir.data_dir), "test")
    all_log_df = pl.concat([train_log_df, test_log_df])
    yad_df = load_yad_data(Path(cfg.dir.data_dir))

[load data] done in 0.1 s


In [21]:
count_df = (
    all_log_df.get_column("yad_no").value_counts().sort(by="counts", descending=True)
)
count_df.head()

row_nr,yad_no,counts
u32,i64,u32
0,12350,1606
1,719,1520
2,3338,1492
3,13468,1373
4,10095,1313


In [28]:
# wid_cd	ken_cd	lrg_cd	sml_cd

location_col = "sml_cd"

yad_counts_df = yad_df.join(count_df, on="yad_no").with_columns(
    pl.col("counts").rank(descending=True).over(location_col).alias("rank")
)
location_candidates_df = (
    yad_counts_df.sort(by="counts", descending=True)
    .group_by(location_col)
    .agg([pl.col("yad_no"), pl.col("rank"), pl.col("yad_no").count().alias("count")])
)
location_candidates_df.head()

sml_cd,yad_no,rank,count
str,list[i64],list[f64],u32
"""30e4ee82595c5d…","[3755, 8196, … 5501]","[1.0, 2.0, … 21.0]",21
"""bc6e513c8af97d…","[10423, 5971, … 6866]","[1.0, 2.0, … 7.0]",7
"""1295c124f6d67c…","[6403, 8496, … 12258]","[1.0, 2.0, … 23.5]",24
"""2170b4597e80dd…","[8349, 6882, … 6085]","[1.0, 2.0, … 24.5]",25
"""427031a26de29b…","[5403, 11473, … 9950]","[1.0, 2.0, … 8.0]",8


In [26]:
yad_counts_df.head()

yad_no,yad_type,total_room_cnt,wireless_lan_flg,onsen_flg,kd_stn_5min,kd_bch_5min,kd_slp_5min,kd_conv_walk_5min,wid_cd,ken_cd,lrg_cd,sml_cd,row_nr,counts,rank
i64,i64,f64,f64,i64,f64,f64,f64,f64,str,str,str,str,u32,u32,f64
1,0,129.0,1.0,0,1.0,,,1.0,"""f0112abf369fb0…","""072c85e1653e10…","""449c52ef581d5f…","""677a32689cd1ad…",6310,30,44.0
2,0,23.0,1.0,0,,,,,"""d86102dd9c232b…","""b4d2fb4e51ea7b…","""5c9a8f48e9df02…","""4ee16ee838dd27…",6405,29,29.0
3,0,167.0,1.0,1,1.0,,,1.0,"""46e33861f921c3…","""572d60f0f5212a…","""8a623b960557e8…","""ab9480fd72a44d…",369,210,1.0
4,0,144.0,1.0,0,1.0,,,1.0,"""46e33861f921c3…","""107c7305a74c8d…","""52c9ea83f2cfe9…","""1cc3e1838bb0fd…",3268,67,7.0
5,0,41.0,1.0,1,,,,,"""43875109d1dab9…","""75617bb07a2785…","""9ea5a911019b66…","""be1b876af18afc…",6277,30,6.0
