# Pydantic Data Validation

Learn how Pydantic validates data and enforces data types in your FastAPI applications.

## 1. Basic Field Validation

Pydantic automatically validates field types and constraints.

In [None]:
from pydantic import BaseModel, Field, EmailStr, ValidationError
from typing import Optional

# Define a model with field constraints
class User(BaseModel):
    username: str = Field(..., min_length=3, max_length=50, description="Username")
    email: EmailStr  # Built-in email validation
    age: int = Field(..., ge=0, le=150)  # Greater/equal 0, less/equal 150
    is_active: bool = True  # Default value

# Valid user
user = User(
    username="john_doe",
    email="john@example.com",
    age=30
)
print("Valid user:")
print(user)
print()

In [None]:
# Invalid user - too short username
try:
    invalid_user = User(
        username="ab",  # Too short!
        email="john@example.com",
        age=30
    )
except ValidationError as e:
    print("Validation error:")
    print(e)
    print()

In [None]:
# Invalid user - bad email
try:
    invalid_user = User(
        username="john_doe",
        email="not_an_email",  # Invalid email!
        age=30
    )
except ValidationError as e:
    print("Invalid email error:")
    print(e)
    print()

In [None]:
# Invalid user - age out of range
try:
    invalid_user = User(
        username="john_doe",
        email="john@example.com",
        age=200  # Too old!
    )
except ValidationError as e:
    print("Age validation error:")
    print(e)
    print()

## 2. Custom Validators

Use `field_validator` decorator to add custom validation logic.

In [None]:
from pydantic import BaseModel, field_validator

class MLModel(BaseModel):
    name: str
    accuracy: float
    
    @field_validator('accuracy')
    @classmethod
    def validate_accuracy(cls, v):
        """Accuracy must be between 0 and 1."""
        if not (0 <= v <= 1):
            raise ValueError('Accuracy must be between 0 and 1')
        return v
    
    @field_validator('name')
    @classmethod
    def validate_name(cls, v):
        """Model name must not be empty and max 100 chars."""
        v = v.strip()
        if len(v) == 0:
            raise ValueError('Model name cannot be empty')
        if len(v) > 100:
            raise ValueError('Model name must be max 100 characters')
        return v

# Valid model
model = MLModel(name="Fraud Detector", accuracy=0.95)
print("Valid model:")
print(model)
print()

In [None]:
# Invalid accuracy
try:
    invalid = MLModel(name="Bad Model", accuracy=1.5)  # > 1
except ValidationError as e:
    print("Accuracy error:")
    print(e)
    print()

In [None]:
# Invalid name
try:
    invalid = MLModel(name="", accuracy=0.95)  # Empty name
except ValidationError as e:
    print("Name error:")
    print(e)
    print()

## 3. Nested Models

Use models within models for complex data structures.

In [None]:
from pydantic import BaseModel
from typing import List

class Metric(BaseModel):
    """A single performance metric."""
    name: str  # e.g., 'accuracy', 'precision'
    value: float  # e.g., 0.95

class ModelMetrics(BaseModel):
    """Collection of model metrics."""
    model_name: str
    metrics: List[Metric]  # List of Metric objects

# Create with nested objects
model_metrics = ModelMetrics(
    model_name="Fraud Detector",
    metrics=[
        Metric(name="accuracy", value=0.95),
        Metric(name="precision", value=0.94),
        Metric(name="recall", value=0.92),
    ]
)
print("Nested models:")
print(model_metrics)
print()
print("Access nested data:")
print(f"First metric: {model_metrics.metrics[0].name} = {model_metrics.metrics[0].value}")
print()

In [None]:
# Convert to dict (useful for responses)
print("As dictionary:")
print(model_metrics.model_dump())
print()

# Convert to JSON (for API responses)
print("As JSON:")
print(model_metrics.model_dump_json(indent=2))
print()

## 4. Optional Fields and Defaults

Make fields optional or provide default values.

In [None]:
from pydantic import BaseModel, Field
from typing import Optional
from datetime import datetime

class MLModel(BaseModel):
    # Required fields
    name: str = Field(..., description="Model name")
    framework: str = Field(..., description="ML framework")
    
    # Optional fields with defaults
    accuracy: Optional[float] = None
    is_active: bool = True  # Default True
    description: str = Field(default="", description="Optional description")
    created_at: datetime = Field(default_factory=datetime.now)

# Minimal creation - only required fields
model1 = MLModel(
    name="Model 1",
    framework="sklearn"
)
print("Minimal creation:")
print(model1)
print()

# With optional fields
model2 = MLModel(
    name="Model 2",
    framework="pytorch",
    accuracy=0.97,
    description="Advanced neural network"
)
print("With optional fields:")
print(model2)
print()

## 5. Type Coercion

Pydantic automatically converts types when possible.

In [None]:
from pydantic import BaseModel

class Data(BaseModel):
    count: int
    ratio: float
    is_valid: bool

# String "42" automatically converts to int 42
data1 = Data(count="42", ratio="3.14", is_valid="true")
print("Type coercion from strings:")
print(f"count: {data1.count} (type: {type(data1.count).__name__})")
print(f"ratio: {data1.ratio} (type: {type(data1.ratio).__name__})")
print(f"is_valid: {data1.is_valid} (type: {type(data1.is_valid).__name__})")
print()

In [None]:
# Invalid coercion raises error
try:
    invalid = Data(count="not_a_number", ratio=3.14, is_valid=True)
except ValidationError as e:
    print("Cannot coerce invalid type:")
    print(e)
    print()

## 6. Enum Validation

Restrict values to specific choices using Enums.

In [None]:
from pydantic import BaseModel
from enum import Enum

class FrameworkEnum(str, Enum):
    """Valid ML frameworks."""
    SKLEARN = "sklearn"
    PYTORCH = "pytorch"
    TENSORFLOW = "tensorflow"
    XGBOOST = "xgboost"

class LifecycleEnum(str, Enum):
    """Model lifecycle stages."""
    DEVELOPMENT = "development"
    STAGING = "staging"
    PRODUCTION = "production"
    ARCHIVED = "archived"

class MLModel(BaseModel):
    name: str
    framework: FrameworkEnum  # Must be one of the enum values
    lifecycle: LifecycleEnum

# Valid
model = MLModel(
    name="Good Model",
    framework=FrameworkEnum.SKLEARN,
    lifecycle=LifecycleEnum.PRODUCTION
)
print("Valid enum usage:")
print(model)
print()

In [None]:
# Can also use string values - Pydantic auto-converts
model2 = MLModel(
    name="Another Model",
    framework="pytorch",  # String automatically converted to enum
    lifecycle="staging"
)
print("Auto-converted from string:")
print(model2)
print()

In [None]:
# Invalid enum value
try:
    invalid = MLModel(
        name="Bad Model",
        framework="invalid_framework",  # Not in enum!
        lifecycle="production"
    )
except ValidationError as e:
    print("Invalid enum error:")
    print(e)
    print()

## 7. Model Configuration

Control Pydantic behavior with model_config.

In [None]:
from pydantic import BaseModel, ConfigDict

class StrictModel(BaseModel):
    """Strict validation - no type coercion."""
    model_config = ConfigDict(
        str_strip_whitespace=True,  # Automatically strip whitespace from strings
        validate_default=True,       # Validate default values
    )
    
    name: str
    count: int

# Whitespace automatically stripped
model = StrictModel(name="  John  ", count=42)
print("Whitespace stripped:")
print(f"name: '{model.name}'")
print()

## 8. JSON Schema Generation

Pydantic automatically generates JSON Schema for API documentation.

In [None]:
from pydantic import BaseModel, Field
import json

class User(BaseModel):
    username: str = Field(..., min_length=3, description="Username (3-50 chars)")
    email: str = Field(..., description="User email address")
    age: int = Field(..., ge=0, le=150, description="User age (0-150)")

# Generate JSON Schema
schema = User.model_json_schema()
print("JSON Schema (for API documentation):")
print(json.dumps(schema, indent=2))
print()

## Summary

**Pydantic provides:**
- ✅ Automatic type validation
- ✅ Field constraints (min/max, regex, custom validators)
- ✅ Type coercion (convert compatible types)
- ✅ Nested models for complex structures
- ✅ Enum validation for restricted values
- ✅ JSON Schema generation for API docs
- ✅ Optional and default values

**Best practices:**
1. Use Field() for constraints and documentation
2. Add field validators for business logic
3. Nest models for clear structure
4. Use Enums for restricted choices
5. Generate schemas for API documentation

In FastAPI, your Pydantic models become your API schema - automatic documentation!