In [None]:
# 結果最高分
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load datasets
train_data = pd.read_csv('train_data.csv')
test_data = pd.read_csv('same_season_test_data.csv')
sample_submission = pd.read_csv('same_season_sample_submission.csv')

# Columns to explicitly drop
columns_to_drop = ['id', 'date', 'home_pitcher', 'away_pitcher', 'home_team_abbr', 'away_team_abbr']

# Drop unnecessary columns dynamically
train_data = train_data.drop(columns=[col for col in columns_to_drop if col in train_data.columns], errors='ignore')
test_data = test_data.drop(columns=[col for col in columns_to_drop if col in test_data.columns], errors='ignore')

# Identify non-numeric columns and apply one-hot encoding
train_data = pd.get_dummies(train_data, drop_first=True)
test_data = pd.get_dummies(test_data, drop_first=True)

# Align features between training and test data
test_data = test_data.reindex(columns=train_data.columns, fill_value=0)

# Separate features and target variable
X = train_data.drop(columns=['home_team_win'], errors='ignore')
y = train_data['home_team_win']

# Ensure numeric-only values and handle missing values
X = X.apply(pd.to_numeric, errors='coerce').fillna(X.median())
test_data = test_data.apply(pd.to_numeric, errors='coerce').fillna(test_data.median())

# Train-test split for validation
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Validate model
y_val_pred = model.predict(X_val)
val_accuracy = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy: {val_accuracy:.4f}")

# Make predictions on test data
test_data['home_team_win'] = model.predict(test_data.drop(columns=['home_team_win'], errors='ignore'))

# Prepare submission
submission = sample_submission.copy()
submission['home_team_win'] = test_data['home_team_win']
submission.to_csv('submission.csv', index=False)

print("Prediction completed. Results saved in 'submission.csv'")
