In [None]:
import unittest
import pandas as pd
from extract import extract_data
from transform import handle_missing_values, encode_categoricals, normalize_features
from load import load_data

class TestETLPipeline(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        # Replace with your actual data source and paths
        cls.source_path = '/Users/mark-danielstamakloe/Desktop/WASHU/SPRING_2024/INTRO_TO_DATA_WRANGLING/my-dagster-project/tutorial_template/notebooks'
        cls.target_path = '/Users/mark-danielstamakloe/Desktop/WASHU/SPRING_2024/INTRO_TO_DATA_WRANGLING/my-dagster-project/tutorial_template/notebooks'
        cls.test_data_path = '/Users/mark-danielstamakloe/Desktop/WASHU/SPRING_2024/INTRO_TO_DATA_WRANGLING/my-dagster-project/tutorial_template/notebooks'

    def test_data_extraction(self):
        # Testing the extract step
        data = extract_data(self.source_path)
        self.assertIsNotNone(data)
        self.assertTrue(isinstance(data, pd.DataFrame))
        self.assertFalse(data.empty)

    def test_data_transformation(self):
        # Load test data for transformation
        test_data = pd.read_csv(self.test_data_path)
        
        # Testing handle missing values
        data_no_missing = handle_missing_values(test_data)
        self.assertFalse(data_no_missing.isnull().values.any())
        
        # Testing encode categoricals
        data_encoded = encode_categoricals(data_no_missing)
        # Check if a known categorical feature is encoded
        self.assertIn('encoded_feature', data_encoded.columns)
        
        # Testing normalize features
        data_normalized = normalize_features(data_encoded)
        # Replace 'feature_to_normalize' with actual feature name
        self.assertAlmostEqual(data_normalized['feature_to_normalize'].mean(), 0)

    def test_data_loading(self):
        # Assuming 'data_ready_to_load' is the transformed data
        data_ready_to_load = pd.read_csv(self.test_data_path)  # As an example
        success_flag = load_data(data_ready_to_load, self.target_path)
        self.assertTrue(success_flag)
        # Add additional checks if necessary, e.g., if the file exists at 'target_path'


import unittest
from model import train_model, make_predictions

class TestModel(unittest.TestCase):
    def setUp(self):
        # Prepare your test data
        self.features_train = # ...
        self.target_train = # ...
        self.features_test = # ...

    def test_train_model(self):
        model = train_model(self.features_train, self.target_train)
        # Test if the model is fitted
        self.assertIsNotNone(model)

    def test_make_predictions(self):
        predictions = make_predictions(model, self.features_test)
        # Test if predictions are returned
        self.assertTrue(len(predictions) > 0)

if __name__ == '__main__':
    unittest.main()



In [None]:
python -m unittest test_etl_pipeline.py
