In [5]:
%cd ..
# This line above changes the directory to D:\Projects\sales-ml

# %% Imports
import mlflow

# Import our custom source-code modules
# (This works because you marked 'src' as a Sources Root in PyCharm,
# and JupyterLab is running from the project root)
from src.data_loader import load_config, load_raw
from src.model_train import train_model

print("Modules imported successfully.")

D:\Projects\sales-ml
Modules imported successfully.


In [13]:
# %% Load Config and Data
config = load_config()
df = load_raw("sample_sales.csv")

In [7]:
print(f"Data loaded. Shape: {df.shape}")
print("Config loaded:")
print(config)

Data loaded. Shape: (2000, 16)
Config loaded:
{'data': {'raw': 'data/raw', 'processed': 'data/processed'}, 'training': {'random_seed': 42, 'test_size': 0.2, 'smote': True, 'model': {'type': 'xgboost', 'params': {'n_estimators': 200, 'learning_rate': 0.1, 'max_depth': 6}}}}


In [8]:
# %% Define Features and Target
X = df.drop("target", axis=1)
y = df["target"]

In [9]:
print(f"Features (X) shape: {X.shape}")
print(f"Target (y) shape: {y.shape}")

Features (X) shape: (2000, 15)
Target (y) shape: (2000,)


In [14]:
# %% --- Run MLflow Experiment ---
# This is the main experiment cell

# 1. Set the experiment name
mlflow.set_experiment("Sales Prediction (SMOTE)")

# 2. Start an MLflow run
with mlflow.start_run(run_name="Third_XGB_NO_SMOTE_FIX"):
    
    print("Starting MLflow run...")
    
    # 3. Log parameters from our config.yaml
    # We log the 'training' section
    mlflow.log_params(config['training'])
    
    # 4. Train the model using our src module
    # This now returns the model AND the metrics dict
    model, metrics = train_model(X, y, config)
    
    # 5. Log the metrics
    mlflow.log_metrics(metrics)
    
    # 6. Log the model itself
    mlflow.sklearn.log_model(model, "model")
    
    # 7. Log a tag to make it easy to find
    mlflow.set_tag("model_type", "XGBoost")
    
    print("\n--- MLflow Run Complete ---")
    print("Logged Parameters, Metrics, and Model.")
    print("Run `mlflow ui` in your terminal to see results.")

Starting MLflow run...
Training XGBoost model...




Evaluating model on test set...
              precision    recall  f1-score   support

           0       0.96      0.99      0.97       378
           1       0.58      0.32      0.41        22

    accuracy                           0.95       400
   macro avg       0.77      0.65      0.69       400
weighted avg       0.94      0.95      0.94       400






--- MLflow Run Complete ---
Logged Parameters, Metrics, and Model.
Run `mlflow ui` in your terminal to see results.
