zenml-io · htahir1 · Mar 13, 2025 · Mar 11, 2025 · Mar 12, 2025 · Mar 12, 2025
diff --git a/airflow-cloud-composer-etl-feature-train/README.md b/airflow-cloud-composer-etl-feature-train/README.md
diff --git a/...assets/zenml_airflow_vertex_gcp_mlops.png → ...assets/zenml_airflow_vertex_gcp_mlops.png b/...assets/zenml_airflow_vertex_gcp_mlops.png → ...assets/zenml_airflow_vertex_gcp_mlops.png
diff --git a/...-composer-etl-feature-train/.dockerignore → eurorate-predictor/.dockerignore b/...-composer-etl-feature-train/.dockerignore → eurorate-predictor/.dockerignore
diff --git a/...oud-composer-etl-feature-train/.gitignore → eurorate-predictor/.gitignore b/...oud-composer-etl-feature-train/.gitignore → eurorate-predictor/.gitignore
diff --git a/...-cloud-composer-etl-feature-train/LICENSE → eurorate-predictor/LICENSE b/...-cloud-composer-etl-feature-train/LICENSE → eurorate-predictor/LICENSE
diff --git a/...cloud-composer-etl-feature-train/Makefile → eurorate-predictor/Makefile b/...cloud-composer-etl-feature-train/Makefile → eurorate-predictor/Makefile
diff --git a/eurorate-predictor/README.md b/eurorate-predictor/README.md
@@ -0,0 +1,166 @@
+# EuroRate Predictor
+
+Turn European Central Bank data into actionable interest rate forecasts with this comprehensive MLOps solution.
+
+## 🚀 Product Overview
+
+EuroRate Predictor is a production-ready MLOps solution that transforms raw European Central Bank (ECB) interest rate data into accurate forecasts to inform your financial decision-making. Built on ZenML's robust framework, it delivers enterprise-grade machine learning pipelines that can be deployed in both development and production environments.
+
+![EuroRate Predictor Pipeline Architecture](.assets/zenml_airflow_vertex_gcp_mlops.png)
+
+### Key Features
+
+- **End-to-End MLOps Pipeline**: From data extraction to model deployment
+- **Cloud-Ready Architecture**: Seamlessly runs on Google Cloud Platform
+- **Flexible Deployment Options**: Development mode for quick iteration, Production mode for enterprise deployment
+- **Automated Model Evaluation**: Ensures only high-quality models are promoted to production
+- **Scalable Infrastructure**: Leverages Airflow and Vertex AI for enterprise-grade performance
+
+## 💡 How It Works
+
+EuroRate Predictor consists of three integrated pipelines:
+
+1. **Data Processing Pipeline** (Powered by Airflow)
+   - Extracts raw ECB interest rate data from authoritative sources
+   - Performs robust data cleaning and transformation
+   - Produces standardized datasets ready for feature engineering
+
+2. **Feature Engineering Pipeline** (Powered by Airflow)
+   - Enriches datasets with financial domain-specific features
+   - Implements time-series specific transformations
+   - Creates feature-rich datasets optimized for predictive modeling
+
+3. **Predictive Modeling Pipeline** (Hybrid Airflow/Vertex AI)
+   - Trains advanced XGBoost regression models on Google's Vertex AI
+   - Implements rigorous model evaluation protocols
+   - Automatically promotes high-performing models to production
+
+## 🔧 Getting Started
+
+EuroRate Predictor offers two operational modes:
+
+- **Development Mode**: Perfect for data scientists to iterate quickly on local machines
+- **Production Mode**: Enterprise-ready deployment using GCP's Airflow/Vertex AI infrastructure
+
+### Prerequisites
+
+- Python 3.8+
+- Google Cloud Platform account (for production deployment)
+- ZenML installed and configured
+
+### Installation
+
+1. Set up your environment:
+
+```bash
+# Create and activate a Python virtual environment
+python3 -m venv .venv
+source .venv/bin/activate
+
+# Install EuroRate Predictor and dependencies
+pip install -r requirements.txt
+
+# Install required integrations
+zenml integration install gcp airflow
+```
+
+### Configuration
+
+#### Development Mode
+For quick iteration and testing, the default configuration works out-of-the-box with the included sample dataset.
+
+#### Production Mode
+For enterprise deployment, configure your cloud infrastructure:
+
+1. **Set up your GCP Stack** using the ZenML [GCP Stack Terraform module](https://registry.terraform.io/modules/zenml-io/zenml-stack/gcp/latest):
+
+```hcl
+module "zenml_stack" {
+  source  = "zenml-io/zenml-stack/gcp"
+
+  project_id = "your-gcp-project-id"
+  region = "europe-west1"
+  orchestrator = "vertex" # or "skypilot" or "airflow"
+  zenml_server_url = "https://your-zenml-server-url.com"
+  zenml_api_key = "ZENKEY_1234567890..."
+}
+output "zenml_stack_id" {
+  value = module.zenml_stack.zenml_stack_id
+}
+output "zenml_stack_name" {
+  value = module.zenml_stack.zenml_stack_name
+}
+```
+To learn more about the terraform script, read the 
+[ZenML documentation.](https://docs.zenml.io/how-to/
+stack-deployment/deploy-a-cloud-stack-with-terraform) or 
+see
+the [Terraform registry](https://registry.terraform.io/
+modules/zenml-io/zenml-stack).
+
+2. **Configure your data sources and destinations**:
+
+- Update the `data_path` and `table_id` in [`configs/etl_production.yaml`](configs/etl_production.yaml)
+- Set the output `table_id` in [`configs/feature_engineering_production.yaml`](configs/feature_engineering_production.yaml)
+
+### Running EuroRate Predictor
+
+Execute the pipelines in sequence to generate your interest rate forecasts:
+
+```bash
+# Run the ETL pipeline
+python run.py --etl
+
+# Run the ETL pipeline in production, i.e., using the right keys
+python run.py --etl --mode production
+
+# Run the feature engineering pipeline with the latest transformed dataset version
+python run.py --feature --mode production
+
+# Run the model training pipeline with the latest augmented dataset version
+python run.py --training --mode production
+
+# Use specific dataset versions (for reproducibility)
+python run.py --feature --transformed_dataset_version "200"
+
+# Run the model training pipeline with a specific augmented dataset version
+python run.py --training --augmented_dataset_version "120"
+```
+
+After execution, access detailed visualizations and metrics in the ZenML dashboard.
+
+## 📊 Results and Visualization
+
+EuroRate Predictor provides comprehensive visualizations of:
+- Data quality metrics
+- Feature importance analysis
+- Model performance evaluations
+- Prediction accuracy over time
+
+Access these insights through the ZenML UI by following the link displayed after pipeline execution.
+
+## 📁 Product Structure
+
+EuroRate Predictor follows a modular architecture:
+
+```
+├── configs                  # Pipeline configuration profiles
+├── data                     # Sample and processed datasets
+├── materializers            # Custom data handlers
+├── pipelines                # Core pipeline definitions
+├── steps                    # Individual pipeline components
+│   ├── extract_data_local.py
+│   ├── extract_data_remote.py
+│   └── transform.py
+├── feature_engineering      # Feature creation components
+├── training                 # Model training components
+└── run.py                   # Command-line interface
+```
+
+## 📚 Documentation
+
+For detailed documentation on using ZenML to build your own MLOps pipelines, please refer to our [ZenML documentation](https://docs.zenml.io/).
+
+## 🔄 Continuous Improvement
+
+EuroRate Predictor is designed for continuous improvement of your interest rate forecasts. As new ECB data becomes available, simply re-run the pipelines to generate updated predictions.
diff --git a/...tl-feature-train/configs/etl_develop.yaml → eurorate-predictor/configs/etl_develop.yaml b/...tl-feature-train/configs/etl_develop.yaml → eurorate-predictor/configs/etl_develop.yaml
diff --git a/...feature-train/configs/etl_production.yaml → ...ate-predictor/configs/etl_production.yaml b/...feature-train/configs/etl_production.yaml → ...ate-predictor/configs/etl_production.yaml
diff --git a/.../configs/feature_engineering_develop.yaml → .../configs/feature_engineering_develop.yaml b/.../configs/feature_engineering_develop.yaml → .../configs/feature_engineering_develop.yaml
diff --git a/...nfigs/feature_engineering_production.yaml → ...nfigs/feature_engineering_production.yaml b/...nfigs/feature_engineering_production.yaml → ...nfigs/feature_engineering_production.yaml
diff --git a/...ature-train/configs/training_develop.yaml → ...e-predictor/configs/training_develop.yaml b/...ature-train/configs/training_develop.yaml → ...e-predictor/configs/training_develop.yaml
diff --git a/...re-train/configs/training_production.yaml → ...redictor/configs/training_production.yaml b/...re-train/configs/training_production.yaml → ...redictor/configs/training_production.yaml
diff --git a/...poser-etl-feature-train/data/raw_data.csv → eurorate-predictor/data/raw_data.csv b/...poser-etl-feature-train/data/raw_data.csv → eurorate-predictor/data/raw_data.csv
diff --git a/...-cloud-composer-etl-feature-train/demo.py → eurorate-predictor/demo.py b/...-cloud-composer-etl-feature-train/demo.py → eurorate-predictor/demo.py
diff --git a/...l-feature-train/materializers/__init__.py → eurorate-predictor/materializers/__init__.py b/...l-feature-train/materializers/__init__.py → eurorate-predictor/materializers/__init__.py
diff --git a/...feature-train/materializers/bq_dataset.py → ...ate-predictor/materializers/bq_dataset.py b/...feature-train/materializers/bq_dataset.py → ...ate-predictor/materializers/bq_dataset.py
diff --git a/.../materializers/bq_dataset_materializer.py → .../materializers/bq_dataset_materializer.py b/.../materializers/bq_dataset_materializer.py → .../materializers/bq_dataset_materializer.py
diff --git a/...eature-train/materializers/csv_dataset.py → ...te-predictor/materializers/csv_dataset.py b/...eature-train/materializers/csv_dataset.py → ...te-predictor/materializers/csv_dataset.py
diff --git a/...materializers/csv_dataset_materializer.py → ...materializers/csv_dataset_materializer.py b/...materializers/csv_dataset_materializer.py → ...materializers/csv_dataset_materializer.py
diff --git a/...tl-feature-train/materializers/dataset.py → eurorate-predictor/materializers/dataset.py b/...tl-feature-train/materializers/dataset.py → eurorate-predictor/materializers/dataset.py
diff --git a/...r-etl-feature-train/pipelines/__init__.py → eurorate-predictor/pipelines/__init__.py b/...r-etl-feature-train/pipelines/__init__.py → eurorate-predictor/pipelines/__init__.py
@@ -15,6 +15,6 @@
 # limitations under the License.
 #
 
-from .etl import etl_pipeline
-from .feature_engineering import feature_engineering_pipeline
-from .training import model_training_pipeline
+from .etl import ecb_predictor_etl_pipeline
+from .feature_engineering import ecb_predictor_feature_engineering_pipeline
+from .training import ecb_predictor_model_training_pipeline
diff --git a/...mposer-etl-feature-train/pipelines/etl.py → eurorate-predictor/pipelines/etl.py b/...mposer-etl-feature-train/pipelines/etl.py → eurorate-predictor/pipelines/etl.py
@@ -26,7 +26,7 @@
 
 
 @pipeline
-def etl_pipeline(mode: str = "develop") -> Dataset:
+def ecb_predictor_etl_pipeline(mode: str = "develop") -> Dataset:
     """Model deployment pipeline.
 
     This is a pipeline that loads data to BigQuery.

diff --git a/...re-train/pipelines/feature_engineering.py → ...redictor/pipelines/feature_engineering.py b/...re-train/pipelines/feature_engineering.py → ...redictor/pipelines/feature_engineering.py
@@ -24,7 +24,7 @@
 
 
 @pipeline
-def feature_engineering_pipeline(
+def ecb_predictor_feature_engineering_pipeline(
     transformed_dataset_id: str, mode: str = "develop"
 ):
     """A pipeline to augment data and load it into BigQuery or locally.

diff --git a/...r-etl-feature-train/pipelines/training.py → eurorate-predictor/pipelines/training.py b/...r-etl-feature-train/pipelines/training.py → eurorate-predictor/pipelines/training.py
@@ -24,7 +24,7 @@
 
 
 @pipeline
-def model_training_pipeline(augmented_dataset_id, mode: str = "develop"):
+def ecb_predictor_model_training_pipeline(augmented_dataset_id, mode: str = "develop"):
     """A pipeline to train an XGBoost model and promote it.
 
     Args:

diff --git a/...mposer-etl-feature-train/requirements.txt → eurorate-predictor/requirements.txt b/...mposer-etl-feature-train/requirements.txt → eurorate-predictor/requirements.txt