In [None]:
%env OPENAI_API_KEY="keyhere"

In [None]:
#!/bin/bash

helm upgrade --install argo-rollouts argo-rollouts \
  --repo https://argoproj.github.io/argo-helm \
  --version 2.37.6 \
  --namespace argo-rollouts \
  --create-namespace \
  --wait

kubectl apply -f- <<EOF
apiVersion: apps/v1
kind: Deployment
metadata:
  name: rollouts-demo
  labels:
    app: rollouts-demo
spec:
  replicas: 4
  selector:
    matchLabels:
      app: rollouts-demo
  template:
    metadata:
      labels:
        app: rollouts-demo
    spec:
      containers:
      - name: rollouts-demo
        image: argoproj/rollouts-demo:blue
        ports:
        - name: http
          containerPort: 8080
          protocol: TCP
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 1
---
apiVersion: v1
kind: Service
metadata:
  name: rollouts-demo
  labels:
    app: rollouts-demo
spec:
  ports:
  - port: 80
    targetPort: http
    protocol: TCP
    name: http
  selector:
    app: rollouts-demo
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
  name: gateway
spec:
  gatewayClassName: istio
  listeners:
  - name: default
    port: 80
    protocol: HTTP
    allowedRoutes:
      namespaces:
        from: All
---
kind: HTTPRoute
apiVersion: gateway.networking.k8s.io/v1beta1
metadata:
  name: rollouts-demo
spec:
  parentRefs:
    - name: gateway
  rules:
  - matches:
    - path:
        type: PathPrefix
        value: /  
    backendRefs:
    - name: rollouts-demo
      kind: Service
      port: 80
EOF

In [2]:
from datetime import datetime
from typing import Dict, List, Optional

from autogen_agentchat.agents import AssistantAgent, UserProxyAgent
from autogen_agentchat.conditions import MaxMessageTermination, TextMentionTermination
from autogen_agentchat.teams import SelectorGroupChat
from autogen_agentchat.ui import Console
from autogen_ext.models.openai import OpenAIChatCompletionClient

# Import all required tools
from kagent.tools.argo import (
    ListRollouts,
    GetRollout,
    PauseRollout,
    PromoteRollout,
    SetRolloutImage,
    StatusRollout,
    VerifyArgoRolloutsControllerInstall,
    VerifyKubectlPluginInstall,
    GenerateResource,
)
from kagent.tools.k8s import (
    ApplyManifest,
    GetResources,
    GetPodLogs,
    PatchResource,
    DeleteResource,
)
from kagent.tools.prometheus import (
    QueryTool,
    QueryRangeTool,
    Config as PrometheusConfig,
    SeriesQueryTool,
    LabelNamesTool,
)

# Prometheus configuration with analysis thresholds
PROMETHEUS_CONFIG = PrometheusConfig(
    name="prom_config",
    base_url="http://localhost:9090/api/v1",
)

# Analysis metrics configuration
ANALYSIS_METRICS = {
    "success_rate": {
        "query": """
        sum(rate(istio_requests_total{reporter="source", destination_service=~"%s.%s.svc.cluster.local", response_code!~"5.*"}[1m]))
        /
        sum(rate(istio_requests_total{reporter="source", destination_service=~"%s.%s.svc.cluster.local"}[1m])) * 100
        """,
        "threshold": 99.0,
        "duration": "5m",
    },
    "latency_p95": {
        "query": """
        histogram_quantile(0.95, 
            sum(rate(istio_request_duration_milliseconds_bucket{reporter="source", destination_service=~"%s.%s.svc.cluster.local"}[1m])) by (le)
        )
        """,
        "threshold": 500,  # 500ms
        "duration": "5m",
    },
    "request_rate": {
        "query": """
        sum(rate(istio_requests_total{reporter="source", destination_service=~"%s.%s.svc.cluster.local"}[1m]))
        """,
        "threshold": 100,  # requests per second
        "duration": "5m",
    },
}

DEPLOYMENT_AGENT_SYSTEM_MESSAGE = f"""
You are a specialized agent combining Argo Rollouts deployment management with Prometheus monitoring capabilities.
Current time: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}

Core Capabilities:

1. Rollout Management:
   - Installation verification
   - Rollout creation and updates
   - Progressive delivery control
   - Deployment status monitoring

2. Metric Analysis:
   - Success rate monitoring
   - Latency analysis
   - Request rate tracking
   - SLO compliance verification

3. Deployment Automation:
   - Canary deployment management
   - Automated promotion/rollback
   - Analysis-based decisions
   - Traffic management

Standard Operating Procedures:

1. Pre-deployment Checks:
   - Verify Argo Rollouts installation
   - Check metric availability
   - Validate baseline metrics

2. Deployment Process:
   - Create/update rollout
   - Monitor canary metrics
   - Evaluate against thresholds
   - Manage promotion/rollback

3. Analysis Patterns:
```promql
# Success Rate
{ANALYSIS_METRICS["success_rate"]["query"]}

# P95 Latency
{ANALYSIS_METRICS["latency_p95"]["query"]}

# Request Rate
{ANALYSIS_METRICS["request_rate"]["query"]}
```

4. Decision Guidelines:
   - Success Rate > {ANALYSIS_METRICS["success_rate"]["threshold"]}%
   - P95 Latency < {ANALYSIS_METRICS["latency_p95"]["threshold"]}ms
   - Request Rate analysis based on baseline

Response Format:
```
Operation: <operation_type>
Status: <status>
Metrics:
  - Success Rate: <value>% (threshold: {ANALYSIS_METRICS["success_rate"]["threshold"]}%)
  - P95 Latency: <value>ms (threshold: {ANALYSIS_METRICS["latency_p95"]["threshold"]}ms)
  - Request Rate: <value> req/s
Analysis:
  <analysis_details>
Recommendation:
  <action_items>
```

Best Practices:
1. Always verify prerequisites
2. Monitor all key metrics
3. Make data-driven decisions
4. Document significant changes
5. Follow progressive delivery patterns
"""

# Create model client
model_client = OpenAIChatCompletionClient(
    model="gpt-4o",
)

# Create combined deployment agent
deployment_agent = AssistantAgent(
    "deployment_agent",
    description="Combined Argo Rollouts and Prometheus monitoring specialist",
    tools=[
        # Argo Rollouts tools
        ListRollouts(),
        GetRollout(),
        PauseRollout(),
        PromoteRollout(),
        SetRolloutImage(),
        StatusRollout(),
        VerifyArgoRolloutsControllerInstall(),
        VerifyKubectlPluginInstall(),
        GenerateResource(),
        # Prometheus tools
        QueryTool(config=PROMETHEUS_CONFIG),
        QueryRangeTool(config=PROMETHEUS_CONFIG),
        SeriesQueryTool(config=PROMETHEUS_CONFIG),
        LabelNamesTool(config=PROMETHEUS_CONFIG),
    ],
    model_client=model_client,
    system_message=DEPLOYMENT_AGENT_SYSTEM_MESSAGE,
)

# Keep existing k8s_agent configuration
k8s_agent = AssistantAgent(
    "k8s_agent",
    description="Kubernetes operations specialist",
    tools=[GetResources(), GetPodLogs(), ApplyManifest(), PatchResource(), DeleteResource()],
    model_client=model_client,
    system_message="""
    You are a Kubernetes specialist agent responsible for cluster operations and resource verification.

    Key Responsibilities:
    1. Resource Verification:
        - Check existence of services, pods, and other resources
        - Verify resource state and configuration
        - Report detailed status of resources

    2. Resource Management:
        - Apply and modify Kubernetes manifests
        - Monitor resource changes
        - Verify successful application of changes

    3. Diagnostic Operations:
        - Retrieve pod logs
        - Check resource status
        - Verify service endpoints

    Always:
    - Confirm resource existence before operations
    - Provide detailed status reports
    - Verify changes after application
    - Report any issues or anomalies immediately
    """,
)


# Helper function for metric analysis during rollout
async def analyze_rollout_metrics(service: str, namespace: str = "default", duration: str = "5m") -> Dict:
    """
    Analyze metrics during rollout process.
    """
    results = {}
    for metric_name, config in ANALYSIS_METRICS.items():
        query = config["query"] % (service, namespace, service, namespace)
        try:
            result = await deployment_agent.tools[-4].run(query)  # Using QueryTool
            results[metric_name] = {
                "value": result,
                "threshold": config["threshold"],
                "status": "pass" if result >= config["threshold"] else "fail",
            }
        except Exception as e:
            results[metric_name] = {"error": str(e), "threshold": config["threshold"], "status": "error"}
    return results


planning_agent = AssistantAgent(
    "PlanningAgent",
    description="An agent for planning tasks, this agent should be the first to engage when given a new task.",
    model_client=model_client,
    system_message="""
    You are a planning agent.
    Your job is to break down complex tasks into smaller, manageable subtasks that can be executed by the team members. DO NOT MAKE UP ADDITIONAL AND UNNECESSARY SUBTASKS.
    Your team members are:
        istio_agent: Performs Istio resource generation tasks.
        k8s_agent: Performs Kubernetes tasks.

    You only plan and delegate tasks - you do not execute them yourself.

    When assigning tasks, use this format:
    1. <agent> : <task>

    After all tasks are complete, summarize the findings and end with "TERMINATE".
    """,
)

# Create team
team = SelectorGroupChat(
    [planning_agent, deployment_agent, k8s_agent],
    model_client=model_client,
    termination_condition=TextMentionTermination("TERMINATE") | MaxMessageTermination(max_messages=25),
    allow_repeated_speaker=True,
)

# Examples:
# task = "Create an Argo Rollout to deploy a new version of the demo application with the color purple using the Kubernetes Gateway API in my cluster."
task = "Check if there are any argo rollout in the cluster in the process of promotion?"
# task = "Use the Kubernetes Gateway API and Argo Rollouts to create rollout resources for the canary and stable services for the demo application in my cluster."
# task = "Check if the Argo Rollouts controller is running and in a healthy state in the cluster?"

await Console(team.run_stream(task=task))

---------- user ----------
Check if there are any argo rollout in the cluster in the process of promotion?
---------- deployment_agent ----------
[FunctionCall(id='call_uth6SlarU7wDbqCyargxBXwT', arguments='{"ns":null}', name='list_rollouts')]
---------- deployment_agent ----------
[FunctionExecutionResult(content='NAMESPACE  NAME                  STRATEGY   STATUS        STEP  SET-WEIGHT  READY  DESIRED  UP-TO-DATE  AVAILABLE\ndefault    rollouts-demo         Canary     Degraded      -     50          0/0    10       0           0        \ngwtest     rollouts-demo-gwtest  Canary     Degraded      -     50          0/0    10       0           0        \n', call_id='call_uth6SlarU7wDbqCyargxBXwT')]
---------- deployment_agent ----------
NAMESPACE  NAME                  STRATEGY   STATUS        STEP  SET-WEIGHT  READY  DESIRED  UP-TO-DATE  AVAILABLE
default    rollouts-demo         Canary     Degraded      -     50          0/0    10       0           0        
gwtest     rollouts-demo-g



In [None]:
print(team.dump_component().model_dump_json(indent=2))