turintech
diff --git a/‎report.py‎
Lines changed: 291 additions & 0 deletions b/‎report.py‎
Lines changed: 291 additions & 0 deletions
@@ -0,0 +1,291 @@
+"""Report generation module for creating HTML reports with embedded visualizations.
+
+This module provides functionality to generate self-contained HTML reports from
+trained models, including performance metrics, coefficients, and matplotlib
+visualizations. Reports use Jinja2 templates and embed images as base64-encoded
+strings for complete portability.
+"""
+
+import base64
+from io import BytesIO
+from pathlib import Path
+from datetime import datetime
+from typing import Dict, List, Optional, Union, Any
+from matplotlib.figure import Figure
+from jinja2 import Environment, FileSystemLoader, Template
+
+
+def figure_to_base64(fig: Figure) -> str:
+    """
+    Convert a matplotlib Figure to a base64-encoded PNG string.
+    
+    This function takes a matplotlib Figure object, saves it to an in-memory
+    BytesIO buffer as PNG format, and encodes the resulting bytes as a base64
+    string. This allows the image to be embedded directly in HTML using a
+    data URI, creating self-contained reports with no external file dependencies.
+    
+    Parameters
+    ----------
+    fig : Figure
+        A matplotlib Figure object to convert. Should be a complete figure
+        ready for display (with all desired formatting, labels, etc.).
+    
+    Returns
+    -------
+    str
+        Base64-encoded string representation of the PNG image. This string
+        can be used directly in HTML <img> tags with a data URI:
+        `<img src="data:image/png;base64,{base64_string}">`
+    
+    Raises
+    ------
+    TypeError
+        If fig is not a matplotlib Figure object.
+    RuntimeError
+        If there's an error during PNG conversion or base64 encoding.
+    
+    Examples
+    --------
+    >>> import matplotlib.pyplot as plt
+    >>> fig, ax = plt.subplots()
+    >>> ax.plot([1, 2, 3], [1, 4, 9])
+    >>> base64_str = figure_to_base64(fig)
+    >>> html = f'<img src="data:image/png;base64,{base64_str}">'
+    >>> plt.close(fig)
+    
+    Notes
+    -----
+    - The figure is saved at 100 DPI by default for reasonable file size
+    - The figure is not modified or closed by this function
+    - Remember to close figures after use to prevent memory leaks
+    - Typical base64 strings are 50-500KB depending on figure complexity
+    - The bbox_inches='tight' parameter minimizes whitespace around the plot
+    """
+    # Validate input
+    if not isinstance(fig, Figure):
+        raise TypeError(
+            f"Expected matplotlib Figure object, got {type(fig).__name__} instead."
+        )
+    
+    try:
+        # Create in-memory bytes buffer
+        buffer = BytesIO()
+        
+        # Save figure to buffer as PNG
+        # bbox_inches='tight' removes excess whitespace
+        # dpi=100 provides good quality while keeping file size reasonable
+        fig.savefig(buffer, format='png', bbox_inches='tight', dpi=100)
+        
+        # Get the bytes from buffer
+        buffer.seek(0)
+        image_bytes = buffer.read()
+        
+        # Encode as base64
+        base64_encoded = base64.b64encode(image_bytes).decode('utf-8')
+        
+        # Close the buffer
+        buffer.close()
+        
+        return base64_encoded
+        
+    except Exception as e:
+        raise RuntimeError(
+            f"Failed to convert figure to base64: {str(e)}"
+        ) from e
+
+
+def generate_report(
+    metrics: Optional[Dict[str, float]],
+    coefficients: Optional[List[Dict[str, Union[str, float]]]],
+    figures: Optional[Dict[str, Figure]],
+    metadata: Optional[Dict[str, Any]],
+    output_path: Union[str, Path]
+) -> None:
+    """
+    Generate a self-contained HTML report with model metrics and visualizations.
+    
+    This function creates a comprehensive HTML report using a Jinja2 template,
+    embedding all visualizations as base64-encoded images. The resulting HTML
+    file is completely self-contained and can be shared, emailed, or opened in
+    any modern web browser without external dependencies.
+    
+    Parameters
+    ----------
+    metrics : Optional[Dict[str, float]]
+        Dictionary of performance metrics with keys:
+        - 'r2': R² score (coefficient of determination)
+        - 'mse': Mean Squared Error
+        - 'rmse': Root Mean Squared Error
+        - 'mae': Mean Absolute Error
+        Can be None if metrics are not available.
+    
+    coefficients : Optional[List[Dict[str, Union[str, float]]]]
+        List of dictionaries containing feature coefficients, where each dict has:
+        - 'feature': Feature name (str)
+        - 'value': Coefficient value (float)
+        Example: [{'feature': 'age', 'value': 0.5}, {'feature': 'income', 'value': 1.2}]
+        Can be None if coefficients are not available.
+    
+    figures : Optional[Dict[str, Figure]]
+        Dictionary of matplotlib Figure objects with keys:
+        - 'predictions': Actual vs predicted values plot
+        - 'residuals': Residuals plot
+        - 'coefficients': Feature coefficients bar chart
+        Figures will be converted to base64 and embedded in HTML.
+        Can be None if visualizations are not available.
+    
+    metadata : Optional[Dict[str, Any]]
+        Dictionary containing model metadata with optional keys:
+        - 'model_type': Type of model (default: 'Linear Regression')
+        - 'training_date': Date model was trained
+        - 'feature_count': Number of features used
+        - 'imputation_method': Method used for handling missing values
+        - 'scaling_method': Method used for feature scaling
+        Can be None, in which case defaults will be used.
+    
+    output_path : Union[str, Path]
+        Path where the HTML report should be saved. Parent directories
+        will be created if they don't exist. Example: 'reports/model_report.html'
+    
+    Raises
+    ------
+    FileNotFoundError
+        If the Jinja2 template file cannot be found.
+    PermissionError
+        If the output path is not writable.
+    RuntimeError
+        If there's an error during template rendering or file writing.
+    
+    Examples
+    --------
+    >>> from visualizations import create_predictions_plot
+    >>> import numpy as np
+    >>> 
+    >>> # Prepare data
+    >>> y_actual = np.array([1, 2, 3, 4, 5])
+    >>> y_pred = np.array([1.1, 2.2, 2.9, 4.1, 4.8])
+    >>> 
+    >>> # Create visualizations
+    >>> fig = create_predictions_plot(y_actual, y_pred)
+    >>> figures = {'predictions': fig}
+    >>> 
+    >>> # Prepare metrics
+    >>> metrics = {'r2': 0.95, 'mse': 0.1, 'rmse': 0.316, 'mae': 0.2}
+    >>> 
+    >>> # Prepare coefficients
+    >>> coefficients = [
+    ...     {'feature': 'age', 'value': 0.5},
+    ...     {'feature': 'income', 'value': 1.2}
+    ... ]
+    >>> 
+    >>> # Prepare metadata
+    >>> metadata = {
+    ...     'model_type': 'Linear Regression',
+    ...     'training_date': '2024-01-15',
+    ...     'feature_count': 2,
+    ...     'imputation_method': 'Mean',
+    ...     'scaling_method': 'Standard Scaler'
+    ... }
+    >>> 
+    >>> # Generate report
+    >>> generate_report(metrics, coefficients, figures, metadata, 'report.html')
+    >>> 
+    >>> # Clean up
+    >>> import matplotlib.pyplot as plt
+    >>> plt.close(fig)
+    
+    Notes
+    -----
+    - All visualizations are embedded as base64-encoded PNG images
+    - The report is completely self-contained (no external file dependencies)
+    - Missing data (None values) is handled gracefully with 'N/A' placeholders
+    - The template includes responsive CSS for mobile and desktop viewing
+    - Typical report file size is <2MB for 3 embedded plots
+    - Parent directories are created automatically if they don't exist
+    - The function closes no figures - caller is responsible for cleanup
+    """
+    # Convert output_path to Path object for easier handling
+    output_path = Path(output_path)
+    
+    # Create parent directories if they don't exist
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    
+    try:
+        # Set up Jinja2 environment
+        # The template should be in the 'templates' directory relative to this file
+        template_dir = Path(__file__).parent / 'templates'
+        
+        if not template_dir.exists():
+            raise FileNotFoundError(
+                f"Templates directory not found at {template_dir}. "
+                "Please ensure the 'templates' directory exists with report_template.html."
+            )
+        
+        env = Environment(loader=FileSystemLoader(str(template_dir)))
+        
+        # Load the template
+        try:
+            template = env.get_template('report_template.html')
+        except Exception as e:
+            raise FileNotFoundError(
+                f"Could not load template 'report_template.html' from {template_dir}: {str(e)}"
+            ) from e
+        
+        # Convert figures to base64 if provided
+        encoded_figures = {}
+        if figures:
+            for key, fig in figures.items():
+                if fig is not None:
+                    try:
+                        encoded_figures[key] = figure_to_base64(fig)
+                    except Exception as e:
+                        # Log warning but continue - allow partial reports
+                        print(f"Warning: Failed to encode figure '{key}': {str(e)}")
+        
+        # Prepare metadata with defaults
+        if metadata is None:
+            metadata = {}
+        
+        # Ensure we have default values for missing metadata
+        metadata_with_defaults = {
+            'model_type': metadata.get('model_type', 'Linear Regression'),
+            'training_date': metadata.get('training_date', 'N/A'),
+            'feature_count': metadata.get('feature_count', 'N/A'),
+            'imputation_method': metadata.get('imputation_method', 'Mean (numeric)'),
+            'scaling_method': metadata.get('scaling_method', 'Standard Scaler'),
+        }
+        
+        # Generate timestamp for report
+        timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+        
+        # Render the template
+        html_content = template.render(
+            timestamp=timestamp,
+            metrics=metrics,
+            coefficients=coefficients,
+            figures=encoded_figures if encoded_figures else None,
+            metadata=metadata_with_defaults
+        )
+        
+        # Write to output file
+        try:
+            with open(output_path, 'w', encoding='utf-8') as f:
+                f.write(html_content)
+        except PermissionError as e:
+            raise PermissionError(
+                f"Permission denied: Cannot write to {output_path}. "
+                "Please check file permissions."
+            ) from e
+        except Exception as e:
+            raise RuntimeError(
+                f"Failed to write report to {output_path}: {str(e)}"
+            ) from e
+            
+    except (FileNotFoundError, PermissionError, RuntimeError):
+        # Re-raise known exceptions
+        raise
+    except Exception as e:
+        # Catch any other unexpected errors
+        raise RuntimeError(
+            f"Unexpected error while generating report: {str(e)}"
+        ) from e