# Day 27: AI-Powered Service Ownership Mapper

Automatically infer service ownership using commit, alert, and ticket data. Generate service catalog entries with ML and LLM.

In [13]:
# 📦 Import libraries
import pandas as pd
import numpy as np
import requests
from collections import defaultdict, Counter
from sklearn.metrics.pairwise import cosine_distances
from sklearn.cluster import SpectralClustering
from IPython.display import display, Markdown

# 📁 Load sample data
commits = pd.read_csv('commits.csv')
alerts = pd.read_csv('alerts.csv')
tickets = pd.read_csv('tickets.csv')

# 🧼 Normalize contributor and service names
def normalize(df, col):
    df[col] = df[col].fillna('Unknown').str.strip().str.lower()
    return df

for df, col in [(commits, 'author'), (alerts, 'responder'), (tickets, 'assignee')]:
    normalize(df, col)

for df in [commits, alerts, tickets]:
    normalize(df, 'service')

# 🧮 Create binary matrix: services × contributors
all_people = set(commits['author']).union(alerts['responder']).union(tickets['assignee'])
service_list = sorted(commits['service'].unique())
people_list = sorted(all_people)

service_matrix = np.array([
    [
        int(
            person in commits[commits['service'] == service]['author'].values or
            person in alerts[alerts['service'] == service]['responder'].values or
            person in tickets[tickets['service'] == service]['assignee'].values
        )
        for person in people_list
    ]
    for service in service_list
])

# 🔗 Cluster services by contributor similarity using cosine distance
distance_matrix = cosine_distances(service_matrix)
clustering = SpectralClustering(n_clusters=3, affinity='precomputed')
labels = clustering.fit_predict(distance_matrix)
service_clusters = {service: label for service, label in zip(service_list, labels)}

# 🧠 Infer ownership using weighted contribution scores
ownership_catalog = {}
for service in service_list:
    scores = defaultdict(int)
    for author in commits[commits['service'] == service]['author']:
        scores[author] += 1
    for responder in alerts[alerts['service'] == service]['responder']:
        scores[responder] += 3  # alerts = higher operational weight
    for assignee in tickets[tickets['service'] == service]['assignee']:
        scores[assignee] += 2
    if scores:
        main_owner = max(scores, key=scores.get)
        recent_changes = sorted(set(scores.keys()) - {main_owner})
    else:
        main_owner = 'unknown'
        recent_changes = []
    ownership_catalog[service] = {
        'main_owner': main_owner,
        'recent_changes': recent_changes,
        'cluster': service_clusters[service]
    }

# 🧠 Generate catalog entry using LLM (Ollama Llama 3)
def llm_catalog_entry(service, main_owner, recent_changes, cluster, purpose=None):
    prompt = (
    f"Service Name: {service}\n"
    f"Maintainer: {main_owner}\n"
    f"Recent Contributors: {', '.join(recent_changes) if recent_changes else 'None'}\n"
    f"Cluster Group: {cluster}\n\n"
    f"Format the above service catalog entry using this exact structure:\n\n"
    f"Service: {service}\n\n"
    f"Main owner: {main_owner}\n\n"
    f"Recent contributors: {', '.join(recent_changes) if recent_changes else 'None'}\n\n"
    f"Cluster group: {cluster}\n\n"
    f"Summary:\n"
    f"<One paragraph describing the service's role and its relationship to other services in the cluster. Avoid generic praise or repetition. Use factual, stakeholder-friendly language.>\n\n"
)
    try:
        response = requests.post(
            "http://localhost:11434/api/generate",
            json={"model": "llama3", "prompt": prompt, "stream": False}
        )
        return response.json().get("response", "LLM failed to respond.")
    except Exception as e:
        return f"LLM error: {str(e)}"

# 📊 Display catalog entries in Markdown
for service, info in ownership_catalog.items():
    entry = llm_catalog_entry(service, info['main_owner'], info['recent_changes'], info['cluster'])
    md = (
        f"---\n"
        f"{entry}\n"
    )
    display(Markdown(md))

  _, diffusion_map = eigsh(


---
Here is the formatted service catalog entry:

Service: auth-service

Main owner: alice

Recent contributors: bob

Cluster group: 2

Summary:
The auth-service plays a critical role in securing access to our cluster's resources and services. As the primary authentication mechanism, it ensures that only authorized users can interact with other services, such as data-store and application-services. By providing robust identity management and access control, auth-service enables secure communication between services and protects sensitive data.


---
Here is the formatted service catalog entry:

Service: order-service

Main owner: dave

Recent contributors: carol

Cluster group: 1

Summary:
The order-service enables customers to place and manage orders within our e-commerce platform. It interacts with other services in the cluster, such as the product-service and inventory-service, to ensure accurate tracking of stock levels and availability. By providing a seamless ordering experience, this service plays a critical role in driving revenue growth and customer satisfaction across the organization.


---
Here is the formatted entry:

Service: payment-service

Main owner: carol

Recent contributors: alice

Cluster group: 0

Summary:
The payment-service enables secure and reliable financial transactions within our distributed system. As a critical component of our cluster's infrastructure, it facilitates seamless communication between various services, ensuring that payments are processed efficiently and accurately. By maintaining this service, we ensure the integrity of our system and provide a solid foundation for our users' financial interactions.
