In [42]:
import os
import openai
import numpy as np
import pandas as pd
import faiss

In [43]:
openai.api_key = os.getenv("OPENAI_API_KEY")
questions_df = pd.read_csv('./data/resident_request_questions.csv')
context_df = pd.read_csv('./data/dc_service_requests.csv')

In [51]:
def get_embedding(text, model="text-embedding-ada-002"):
    response = openai.embeddings.create(
        input=[text],
        model=model
    )
    return np.array(response.data[0].embedding)

In [None]:
display(context_df)

Unnamed: 0,request_type,department,resolution_estimate,Description
0,Abandoned Bicycle,"DPW, DPW",20 bd,This service request is to be used for bicycle...
1,Abandoned Vehicle - On Private Property,"DPW, DPW",45 bd,Please use this service request to request the...
2,Abandoned Vehicle - On Public Property,"DPW, DPW",13 bd,Please use this service request to request the...
3,Alley Repair Investigation,DDOT,270 bd,Please use this service request type to invest...
4,Bee Treatment and Inspection (DOH),DOH,14 bd,Bee Treatment - This service request is limite...
...,...,...,...,...
82,Tree Inspection,DDOT,5 bd,Use this request type to report an urgent tree...
83,Tree Planting,DDOT,500 bd,Urban Forestry Administration (UFA) plants nea...
84,Tree Pruning,DDOT,180 bd,Please use this service type to request a publ...
85,Tree Removal,DDOT,180 bd,Please use this service type to request the re...


In [55]:
def res_estimate_helper(res_estimate):
    resolution_estimate = res_estimate.split(' ')[0]
    bd_or_cd = res_estimate.split(' ')[1]
    resolution_estimate += ' business days' if bd_or_cd == 'bd' else ' calendar days'
    return resolution_estimate

In [None]:
all_embeddings = []

for idx, row in context_df.iterrows():
    combined_text = (
        f"Request Type: {row['request_type']}\n"
        f"Department: {row['department']}\n"
        f"Resolution Estimate: {res_estimate_helper(row['resolution_estimate'])}\n"
        f"Description: {row['Description']}"
    )
    
    embed_vec = get_embedding(combined_text)
    all_embeddings.append(embed_vec)

all_embeddings = np.array(all_embeddings)

Request Type: Abandoned Bicycle
Department: DPW, DPW
Resolution Estimate: 20 business days
Description: This service request is to be used for bicycles that are left on public space and which are abandoned.


In [53]:
all_embeddings = all_embeddings.astype(np.float32)
embedding_dim = all_embeddings.shape[1]

faiss_index = faiss.IndexFlatL2(embedding_dim)
faiss_index.add(all_embeddings)
faiss.write_index(faiss_index, "dc_requests.faiss")

In [54]:
## To simply load pre-calculated, run this:
faiss_index = faiss.read_index("dc_requests.faiss")

In [57]:
def search_dc_requests(query: str, top_k: int = 3):
    query_vec = get_embedding(query).astype(np.float32).reshape(1, -1)
    # Search FAISS index
    distances, indices = faiss_index.search(query_vec, top_k)

    results = []
    for rank, idx in enumerate(indices[0]):
        row_data = context_df.iloc[idx]
        dist = distances[0][rank]
        results.append({
            "request_type": row_data["request_type"],
            "department": row_data["department"],
            "resolution_estimate": res_estimate_helper(row_data["resolution_estimate"]),
            "description": row_data["Description"],
            "distance": float(dist),
        })
    return results

In [58]:
query = "How long does it take to fix a pothole?"
dc_matches = search_dc_requests(query, top_k=3)
print(dc_matches)

[{'request_type': 'Pothole', 'department': 'DDOT', 'resolution_estimate': '3 business days', 'description': 'Please use this request type for Pothole investigation. Pothole repairs normally take approximately 3 business days (72 hours), weather permitting, for completion.\n\n', 'distance': 0.23399491608142853}, {'request_type': 'Roadway Repair', 'department': 'DDOT', 'resolution_estimate': '270 business days', 'description': 'Please use this service request type to investigate street surface issues. Please provide the specific location (i.e. address, intersection) and describe the specific repair problem (i.e. uneven pavement, numerous potholes). Also if possible, provide any information regarding the street surfaces paving material (i.e. concrete, asphalt, or brick).\n\n', 'distance': 0.3189466595649719}, {'request_type': 'Alley Repair Investigation', 'department': 'DDOT', 'resolution_estimate': '270 business days', 'description': 'Please use this service request type to investigate a

In [111]:
def generate_response(user_query):
    # 1. Retrieve relevant requests from context
    dc_results = search_dc_requests(user_query, top_k=5)
    
    # 2. Create context string
    context_lines = []
    for res in dc_results:
        context_lines.append(
            f"Request Type: {res['request_type']}\n"
            f"Department: {res['department']}\n"
            f"Resolution Estimate: {res_estimate_helper(res['resolution_estimate'])}\n"
            f"Description: {res['description']}\n"
            f"Distance: {res['distance']}\n"
            "----"
        )
    context_str = "\n".join(context_lines)
    
    # 3. Build the final prompt
    SYSTEM_PROMPT = "You are an AI system that assists with Washington D.C. resident inquiries..."
    FINAL_PROMPT = (
        f"{SYSTEM_PROMPT}\n\n"
        f"Context from Washington D.C. service requests:\n{context_str}\n\n"
        f"User's question: {user_query}\n"
        f"Provide clear, concise, and legally compliant responses." 
        f"If the answer isn't in the context, state that you're not sure."
        f"If you are uncertain about the answer, politely say so and prompt for clarification."
        f""
    )
    
    # 4. Call API
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": FINAL_PROMPT},
        ],
        temperature=0.3
    )
    answer = response.choices[0].message.content
    
    return answer

In [None]:
def evaluate_response_with_rules(query, ai_response, context_df):
    """
    Checks if the AI response obeys known rules from df_dc.
    Returns a dictionary with flags or suggested corrections.
    """
    evaluation_result = {
        "potential_request_types": None,
        "has_rule_conflict": False,
        "message": "",
        "suggested_fix": None
    }
    
    search_matches = search_dc_requests(query, top_k=3)
    potential_request_types = [match['request_type'] for match in search_matches]
    
    evaluation_result["potential_request_types"] = potential_request_types
    rows = []
    known_resolution_estimates = []
    for request_type in potential_request_types:
        row = context_df[context_df["request_type"].str.lower() == request_type.lower()].iloc[0]
        rows.append(row)
        known_resolution_estimates.append(res_estimate_helper(row["resolution_estimate"]))
        
    # 3. Check if the known resolution is mentioned in the AI answer
    error_idx = 0
    for i, known_resolution_est in enumerate(known_resolution_estimates):
        if known_resolution_est.split(' ')[0] not in ai_response:
            evaluation_result["has_rule_conflict"] = True
        elif known_resolution_est.split(' ')[1] == 'business' and 'calendar' in ai_response.lower():
            evaluation_result["has_rule_conflict"] = True
            error_idx = i
        elif known_resolution_est.split(' ')[1] == 'calendar' and 'business' in ai_response.lower():
            evaluation_result["has_rule_conflict"] = True
            error_idx = i
        else:
            evaluation_result["has_rule_conflict"] = False
            evaluation_result["message"] = (
                f"AI response includes the known resolution estimate ({known_resolution_est})."
            )
            break
    if evaluation_result['has_rule_conflict']:
        evaluation_result["message"] = (
            f"AI response does not reference the known resolution estimate "
            f"({known_resolution_estimates[error_idx]})."
        )       
        # 4. Suggest a fix. TODO: Implement re-prompting
        corrected_answer = f"Fix: According to city data, requests related to {potential_request_types[0]} typically take {known_resolution_estimates[error_idx]}."
        evaluation_result["suggested_fix"] = corrected_answer

    return evaluation_result

In [97]:
user_query = "How long does it take to fix a pothole?"
ai_response = "It usually gets handled within a week by the city."

eval_result = evaluate_response_with_rules(user_query, ai_response, context_df)

print("Evaluation Result:")
print(eval_result)

if eval_result["has_rule_conflict"]:
    print("\nFlagged Response: The AI did not mention the correct resolution time.")
    print("Suggested Fix:")
    print(eval_result["suggested_fix"])

Evaluation Result:
{'potential_request_types': ['Pothole', 'Roadway Repair', 'Alley Repair Investigation'], 'has_rule_conflict': True, 'message': 'AI response does not reference the known resolution estimate (3 business days).', 'suggested_fix': 'Fix: According to city data, Pothole typically takes 3 business days.'}

Flagged Response: The AI did not mention the correct resolution time.
Suggested Fix:
Fix: According to city data, Pothole typically takes 3 business days.


In [118]:
# user_query = "Do I need a permit to build a fence around my yard?"
user_query = "I started seeing lots of dead rats on my street. What can I do about this?"
# user_query = "There is always trash left in front of my building. What can I do about this?"

ai_answer = generate_response(user_query)
eval_result = evaluate_response_with_rules(user_query, ai_answer, context_df)

print("User Query:", user_query)
print("AI Answer:", ai_answer)
print("Evaluation:", eval_result)

if eval_result["has_rule_conflict"]:
    print("\nFlagged Response: The AI did not give the correct resolution time.")
    print("Suggested Fix:")
    print(eval_result["suggested_fix"])

User Query: I started seeing lots of dead rats on my street. What can I do about this?
AI Answer: If you are seeing lots of dead rats on your street, you can request a Dead Animal Collection through the Department of Public Works (DPW). This service is for the removal of dead animals on public property. The resolution estimate for this request is 2 calendar days. You can submit your request through the appropriate channels to have the dead rats removed promptly.
Evaluation: {'potential_request_types': ['Rodent Inspection and Treatment (Rat)', 'Dead Animal Collection', 'Rat Replacement Containers'], 'has_rule_conflict': True, 'message': 'AI response does not reference the known resolution estimate (2 business days).', 'suggested_fix': 'Fix: According to city data, requests related to Rodent Inspection and Treatment (Rat) typically take 2 business days.'}

Flagged Response: The AI did not give the correct resolution time.
Suggested Fix:
Fix: According to city data, requests related to Ro