Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: supporting Facebook competition (don't merge now) #364

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Prev Previous commit
Next Next commit
Further Revision
  • Loading branch information
xisen-w committed Sep 27, 2024
commit 52947e062fa17775031cd59b652026137e0cac2b
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@
import numpy as np
import pandas as pd
from fea_share_preprocess import preprocess_script
from sklearn.metrics import average_precision_score
from sklearn.metrics import average_precision_score, log_loss

# Set random seed for reproducibility
SEED = 42
@@ -57,40 +57,42 @@ def flatten_columns(df: pd.DataFrame) -> pd.DataFrame:
X_valid = flatten_columns(X_valid)
X_test = flatten_columns(X_test)

model_l = [] # list[tuple[model, predict_func]]
model_l = [] # list[tuple[model, predict_func, validation_score]]
for f in DIRNAME.glob("model/model*.py"):
m = import_module_from_path(f.stem, f)
# Check if the fit function accepts n_classes
if 'n_classes' in m.fit.__code__.co_varnames:
model = m.fit(X_train, y_train, X_valid, y_valid, n_classes)
else:
model = m.fit(X_train, y_train, X_valid, y_valid)
model_l.append((model, m.predict))

# Evaluate the model on the validation set
y_valid_pred = m.predict(model, X_valid)
validation_score = log_loss(y_valid, y_valid_pred)

model_l.append((model, m.predict, validation_score))

# 4) Evaluate the model on the validation set
y_valid_pred_l = []
for model, predict_func in model_l:
y_valid_pred_l.append(predict_func(model, X_valid))
# Sort models by validation score (lower is better for log loss)
model_l.sort(key=lambda x: x[2])

# 5) Ensemble
y_valid_pred_proba = np.mean(y_valid_pred_l, axis=0)
# 4) Use the best model for predictions
best_model, best_predict_func, _ = model_l[0]

# 5) Make predictions on the validation set using the best model
y_valid_pred = best_predict_func(best_model, X_valid)

# Compute metrics
map3 = compute_map3(y_valid, y_valid_pred_proba)
map3 = compute_map3(y_valid, y_valid_pred)
print(f"MAP@3 on validation set: {map3}")

# 6) Save the validation metrics
pd.Series(data=[map3], index=["MAP@3"]).to_csv("submission_score.csv")

# 7) Make predictions on the test set and save them
y_test_pred_l = []
for model, predict_func in model_l:
y_test_pred_l.append(predict_func(model, X_test))

y_test_pred_proba = np.mean(y_test_pred_l, axis=0)
# 7) Make predictions on the test set using the best model
y_test_pred = best_predict_func(best_model, X_test)

# Get top 3 predictions for each test sample
top_3_indices = np.argsort(-y_test_pred_proba, axis=1)[:, :3]
top_3_indices = np.argsort(-y_test_pred, axis=1)[:, :3]
top_3_place_ids = place_id_encoder.inverse_transform(top_3_indices)

# Create submission DataFrame
Loading
Oops, something went wrong.