In [1]:
import sys
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from sklearn.model_selection import train_test_split

PROJECT_ROOT = Path.cwd().parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

from src.model import SpaceClassifier

DATA_PATH = PROJECT_ROOT / "data" / "processed" / "labeled_nasa_data.csv"
MODELS_PATH = PROJECT_ROOT / "models"
MODELS_PATH.mkdir(exist_ok=True)

In [2]:
df = pd.read_csv(DATA_PATH)

df = df.dropna(subset=['explanation', 'label'])

X = df['explanation']
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Training set: {len(X_train)} items")
print(f"Test set: {len(X_test)} items")

Training set: 1430 items
Test set: 358 items


In [3]:
# experiment 1
nb_model = SpaceClassifier(model_type='nb')

nb_model.train(X_train, y_train)

acc_nb, report_nb = nb_model.evaluate(X_test, y_test)

print(f"Naive Bayes Accuracy: {acc_nb:.4f}")
print("\nClassification Report:\n")
print(report_nb)

Training NB model...
Training complete.
Naive Bayes Accuracy: 0.7709

Classification Report:

                precision    recall  f1-score   support

Asteroid_Comet       1.00      0.19      0.31        27
        Galaxy       0.84      0.86      0.85        79
        Nebula       0.82      0.97      0.89        86
        Planet       0.68      0.99      0.81        96
          Star       0.78      0.36      0.49        70

      accuracy                           0.77       358
     macro avg       0.83      0.67      0.67       358
  weighted avg       0.79      0.77      0.74       358



In [4]:
# experiment 2
lr_model = SpaceClassifier(model_type='logreg')

lr_model.train(X_train, y_train)

acc_lr, report_lr = lr_model.evaluate(X_test, y_test)

print(f"Logistic Regression Accuracy: {acc_lr:.4f}")
print("\nClassification Report:\n")
print(report_lr)

Training LOGREG model...
Training complete.
Logistic Regression Accuracy: 0.8547

Classification Report:

                precision    recall  f1-score   support

Asteroid_Comet       0.60      0.78      0.68        27
        Galaxy       0.88      0.89      0.88        79
        Nebula       0.93      0.87      0.90        86
        Planet       0.91      0.93      0.92        96
          Star       0.80      0.73      0.76        70

      accuracy                           0.85       358
     macro avg       0.82      0.84      0.83       358
  weighted avg       0.86      0.85      0.86       358



In [5]:
print(f"Naive Bayes: {acc_nb:.4f}")
print(f"Logistic Regression: {acc_lr:.4f}")

if acc_lr > acc_nb:
    best_model = lr_model
else:
    best_model = nb_model

save_path = MODELS_PATH / "space_model_v1.pkl"
best_model.save(save_path)

print(f"✅ Model saved to: {save_path}")

Naive Bayes: 0.7709
Logistic Regression: 0.8547
Model saved to c:\Users\samol\Desktop\space_objects_recognition\models\space_model_v1.pkl
✅ Model saved to: c:\Users\samol\Desktop\space_objects_recognition\models\space_model_v1.pkl
