# Model Training

This notebook is for training machine learning models on the preprocessed data.

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report,
)
from src.model import create_model
from src.train import train_model
import joblib

# Load the processed data
df = pd.read_csv('../data/processed/processed_data.csv')

# Define features and target variable
X = df.drop('target_column', axis=1)
y = df['target_column']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Create the model
model = create_model()

# Train the model
trained_model = train_model(model, X_train, y_train)

# Evaluate the model
y_pred = trained_model.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred))

# Calculate and print other metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

# Save the trained model
joblib.dump(trained_model, '../data/models/trained_model.pkl')
