In [4]:
import pandas as pd
import numpy as np
import glob
import os
import re

# ❶ ディレクトリ指定
base_dir = r"C:\Users\81807\Desktop\Kaggle\GCI②(NFL Draft Prediction)\submissions"
file_paths = glob.glob(os.path.join(base_dir, "submission_*.csv"))
selected_files = [f for f in file_paths if re.search(r"\(\d", f)]

# ❷ 提出ファイルの Drafted ソートベクトルを保存
vectors = {}
scores = {}

for file in selected_files:
    basename = os.path.basename(file)
    # スコア抽出
    score_match = re.search(r"\((\d+\.\d+)\)", basename)
    score = float(score_match.group(1)) if score_match else None
    scores[file] = score
    
    # Drafted ベクトル
    df = pd.read_csv(file)
    drafted_sorted = np.sort(df["Drafted"].values)
    vectors[file] = drafted_sorted

print(f"✅ 収集済ファイル数: {len(vectors)}")

# ❸ 新規提出ファイルのスコア推定
new_file = r"C:\Users\81807\Desktop\Kaggle\GCI②(NFL Draft Prediction)\submissions\submission_20_0705().csv"
df_new = pd.read_csv(new_file)
new_vec = np.sort(df_new["Drafted"].values)

# ❹ L1距離で最も近いファイルを探索
distances = {}
for file, vec in vectors.items():
    dist = np.linalg.norm(vec - new_vec, ord=1)
    distances[file] = dist

closest_file = min(distances, key=distances.get)

print(f"✅ 最も近いファイル: {closest_file}")
print(f"✅ 推定スコア: {scores[closest_file]}")

# ❺ 上位5件も表示
closest_5 = sorted(distances.items(), key=lambda x: x[1])[:5]
print("\n✅ 類似度上位5件:")
for file, dist in closest_5:
    print(f"{os.path.basename(file)} | Score: {scores[file]} | L1距離: {dist:.5f}")


✅ 収集済ファイル数: 15
✅ 最も近いファイル: C:\Users\81807\Desktop\Kaggle\GCI②(NFL Draft Prediction)\submissions\submission_10_0628(0.83668).csv
✅ 推定スコア: 0.83668

✅ 類似度上位5件:
submission_10_0628(0.83668).csv | Score: 0.83668 | L1距離: 9.14394
submission_08_0627(0.83414).csv | Score: 0.83414 | L1距離: 10.89482
submission_18_0705(0.84272).csv | Score: 0.84272 | L1距離: 12.30194
submission_06_0627(0.82752).csv | Score: 0.82752 | L1距離: 12.34326
submission_17_0704(0.84169).csv | Score: 0.84169 | L1距離: 12.95179


In [5]:
df_09 = pd.read_csv(r"C:\Users\81807\Desktop\Kaggle\GCI②(NFL Draft Prediction)\submissions\submission_09_0627(0.84205).csv")
vec_09 = np.sort(df_09["Drafted"].values)
dist_09 = np.linalg.norm(vec_09 - new_vec, ord=1)
print(f"✅ 09モデルとのL1距離: {dist_09:.5f}")


✅ 09モデルとのL1距離: 16.76278
