In [None]:
# !pip install oracledb openai google-generativeai

In [None]:
# ====================================================================
# CONFIGURATION CELL - SET ALL VARIABLES HERE
# ====================================================================

import getpass
import os

# =========================
# USER CONFIGURATION
# =========================

# API Configuration
os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google Gemini API key: ")

# Package Configuration - ADD YOUR PACKAGES HERE
PACKAGES_TO_PROCESS = [
    "GIN_STP_PKG",
    "GIN_ACCOUNTS_PKG",
]

# Database Configurations - MODIFY THESE FOR YOUR DATABASES
DATABASE_CONFIGS = {
    "DATABASE_A": { 
        "host": "10.176.18.91",
        "port": 1522,
        "service_name": "HERITAGE19C",
        "username": "TQ_GIS",
        "password": "TQ_GIS"
    },
    "DATABASE_B": { 
        "host": "10.176.18.110",
        "port": 1521,
        "service_name": "NEW_GEMINIA",
        "username": "TQ_GIS",
        "password": "TQ_GIS"
    }
}

# Output Directories
BASE_DIRECTORY = 'packages'
DIFFS_DIRECTORY = 'diffs'
REPORTS_DIRECTORY = 'reports'
OUTPUT_DIRECTORY = 'output'
CONSOLIDATED_DIRECTORY = 'consolidated_packages'

# Logging Configuration - ERROR will only show errors, INFO shows all operations
LOG_LEVEL = 'ERROR'  # Change to 'INFO' if you want detailed logging

# ====================================================================
# IMPORTS AND SETUP
# ====================================================================

import oracledb as cx_Oracle
import re
import json
import logging
import difflib
import shutil
import google.generativeai as genai
from datetime import datetime

# Configure logging
logging.basicConfig(
    level=getattr(logging, LOG_LEVEL.upper(), logging.ERROR), 
    format='%(asctime)s - %(levelname)s - %(message)s'
)

# Configure Google Gemini
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
if not GOOGLE_API_KEY:
    raise ValueError("Google Gemini API key not found. Please set the 'GOOGLE_API_KEY' environment variable.")
genai.configure(api_key=GOOGLE_API_KEY)

# ====================================================================
# CORE FUNCTIONS
# ====================================================================

def clean_sql_content(content: str) -> str:
    """Clean SQL content by removing code blocks and extra whitespace"""
    content = re.sub(r'^```sql\s*\n?', '', content, flags=re.IGNORECASE)
    content = re.sub(r'\n?\s*```\s*$', '', content)
    content = re.sub(r'\n?\s*FINAL_MERGING_DONE\s*$', '', content, flags=re.IGNORECASE)
    return content.strip()

def get_package_source(db_params, package_name, object_type='PACKAGE BODY'):
    """Retrieve package source code from database"""
    logging.info(f"Connecting to database {db_params['service_name']} to retrieve {object_type} '{package_name}'.")
    
    try:
        dsn_tns = cx_Oracle.makedsn(
            db_params['host'],
            db_params['port'],
            service_name=db_params['service_name']
        )
        conn = cx_Oracle.connect(
            user=db_params['username'],
            password=db_params['password'],
            dsn=dsn_tns
        )
        cursor = conn.cursor()
        
        query = f"""
        SELECT text
        FROM all_source
        WHERE name = '{package_name.upper()}'
        AND type = '{object_type.upper()}'
        ORDER BY line
        """
        
        cursor.execute(query)
        source_lines = [row[0] for row in cursor.fetchall()]
        source = ''.join(source_lines)
        
        logging.info(f"Retrieved {len(source)} characters of source code from {db_params['service_name']}.")
        
    except cx_Oracle.DatabaseError as e:
        logging.error(f"Database connection failed: {e}")
        source = ""
    finally:
        try:
            cursor.close()
            conn.close()
        except:
            pass
    
    return source

def parse_package_components(source_code):
    """Parse package components (procedures, functions, cursors, types, variables)"""
    logging.info("Parsing package components.")
    
    components = {
        'procedures': {},
        'functions': {},
        'cursors': {},
        'types': {},
        'variables': {},
    }
    
    # Patterns
    proc_pattern = re.compile(
        r"""
        PROCEDURE\s+([\w$]+)\s*
        \(.*?\)\s*
        (.*?)
        (?=PROCEDURE|FUNCTION|\Z)
        """,
        re.IGNORECASE | re.DOTALL | re.VERBOSE
    )
    func_pattern = re.compile(
        r"""
        FUNCTION\s+([\w$]+)\s*
        \(.*?\)\s*
        (.*?)
        (?=PROCEDURE|FUNCTION|\Z)
        """,
        re.IGNORECASE | re.DOTALL | re.VERBOSE
    )
    
    # Procedures
    for match in proc_pattern.finditer(source_code):
        name = match.group(1)
        components['procedures'][name] = match.group(0).strip()
    # Functions
    for match in func_pattern.finditer(source_code):
        name = match.group(1)
        components['functions'][name] = match.group(0).strip()
    
    # Declarations
    decl_match = re.search(r'(IS|AS)\s+(.*?)\s+BEGIN', source_code, re.IGNORECASE | re.DOTALL)
    if decl_match:
        decl = decl_match.group(2)
        cursor_pattern = re.compile(r'CURSOR\s+([\w$]+)\s*(IS|AS)\s+(.*?);', re.IGNORECASE | re.DOTALL)
        type_pattern = re.compile(r'TYPE\s+([\w$]+)\s+(IS|AS)\s+(.*?);', re.IGNORECASE | re.DOTALL)
        variable_pattern = re.compile(r'(\w+)\s+(CONSTANT\s+)?[\w%\.]+(\([\d\s,]*\))?\s*(NOT\s+NULL)?*\s*(:=\s*.*|)\s*;', re.IGNORECASE | re.DOTALL)
        for m in cursor_pattern.finditer(decl):
            components['cursors'][m.group(1)] = m.group(0).strip()
        for m in type_pattern.finditer(decl):
            components['types'][m.group(1)] = m.group(0).strip()
        for m in variable_pattern.finditer(decl):
            components['variables'][m.group(1)] = m.group(0).strip()
    return components

def save_components_to_disk(components, package_name, base_directory=BASE_DIRECTORY):
    """Save package components to disk"""
    package_dir = os.path.join(base_directory, package_name)
    os.makedirs(package_dir, exist_ok=True)
    for comp_type, comp_dict in components.items():
        type_dir = os.path.join(package_dir, comp_type)
        os.makedirs(type_dir, exist_ok=True)
        for name, definition in comp_dict.items():
            safe_name = ''.join(c if c.isalnum() or c in ' _-' else '_' for c in name)
            with open(os.path.join(type_dir, f"{safe_name}.sql"), 'w', encoding='utf-8') as f:
                f.write(definition)

def save_components_as_json(components, package_name, base_directory=BASE_DIRECTORY):
    """Save components as JSON"""
    package_dir = os.path.join(base_directory, package_name)
    os.makedirs(package_dir, exist_ok=True)
    with open(os.path.join(package_dir, f"{package_name}_components.json"), 'w', encoding='utf-8') as f:
        json.dump(components, f, indent=4)

def compare_components(components1, components2, package_name):
    """Compare components and generate diffs"""
    differences = {}
    diffs_output_dir = os.path.join(DIFFS_DIRECTORY, package_name)
    os.makedirs(diffs_output_dir, exist_ok=True)
    
    for comp_type in components1.keys():
        set1, set2 = set(components1[comp_type]), set(components2[comp_type])
        added, removed, modified = set2 - set1, set1 - set2, set()
        for common in set1 & set2:
            content1 = components1[comp_type][common].strip().splitlines()
            content2 = components2[comp_type][common].strip().splitlines()
            if content1 != content2:
                modified.add(common)
                diff = difflib.unified_diff(content1, content2,
                                            fromfile=f'{package_name}_DBA_{comp_type}_{common}.sql',
                                            tofile=f'{package_name}_DBB_{comp_type}_{common}.sql',
                                            lineterm='')
                with open(os.path.join(diffs_output_dir, f'{comp_type}_{common}_diff.txt'), 'w', encoding='utf-8') as f:
                    f.write('\n'.join(diff))
        differences[comp_type] = {'added': list(added), 'removed': list(removed), 'modified': list(modified)}
    return differences

def merge_database_package_procedures_with_history(differences_file_path, heritage_package_procedure, geminia_package_procedure, output_package_path):
    """Merge procedures using Gemini AI"""
    generation_config = {"temperature": 0.2, "top_p": 0.95, "max_output_tokens": 8192, "response_mime_type": "text/plain"}
    model = genai.GenerativeModel(
        model_name="gemini-1.5-flash",
        generation_config=generation_config,
        system_instruction="You are a senior PL/SQL developer. Merge procedures and functions. End with FINAL_MERGING_DONE.",
    )
    with open(differences_file_path) as f: diff_content = f.read()
    with open(heritage_package_procedure) as f: heritage_content = f.read()
    with open(geminia_package_procedure) as f: geminia_content = f.read()
    base_prompt = f"""
    Merge two PL/SQL procedures.
    Diff (context only): {diff_content}
    Database A: {heritage_content}
    Database B: {geminia_content}
    Write FINAL_MERGING_DONE at the end.
    """
    merged_chunks, history = [], [{"role": "user", "parts": [base_prompt]}]
    chat_session = model.start_chat(history=history)
    while True:
        response = chat_session.send_message("continue")
        chunk = response.text.strip()
        merged_chunks.append(chunk)
        history.append({"role": "model", "parts": [chunk]})
        if "FINAL_MERGING_DONE" in chunk.upper():
            merged_chunks[-1] = merged_chunks[-1].replace("FINAL_MERGING_DONE", "").strip()
            break
        else:
            history.append({"role": "user", "parts": ["Continue."]})
            chat_session = model.start_chat(history=history)
    merged_content = clean_sql_content("\n\n".join(merged_chunks))
    os.makedirs(os.path.dirname(output_package_path) or ".", exist_ok=True)
    with open(output_package_path, "w") as f: f.write(merged_content)
    return output_package_path

def merge_all_database_procedures_and_functions(package_name, differences_file, heritage_path, geminia_path, output_dir):
    """Merge all procedures/functions or copy if only in DB_B"""
    with open(differences_file) as f: differences = json.load(f)
    package_output_dir = os.path.join(output_dir, package_name)
    os.makedirs(package_output_dir, exist_ok=True)
    merged_files = []
    for comp_type, diff_data in differences.items():
        if comp_type not in ['procedures', 'functions']:
            continue
        # modified → Gemini
        for name in diff_data.get('modified', []):
            diff_path = os.path.join(DIFFS_DIRECTORY, package_name, f"{comp_type}_{name}_diff.txt")
            db_a_file = os.path.join(heritage_path, comp_type, f"{name}.sql")
            db_b_file = os.path.join(geminia_path, comp_type, f"{name}.sql")
            out_file = os.path.join(package_output_dir, f"merged_{name}.sql")
            if all(os.path.exists(p) for p in [diff_path, db_a_file, db_b_file]):
                merged_files.append(merge_database_package_procedures_with_history(diff_path, db_a_file, db_b_file, out_file))
        # added → copy DB_B
        for name in diff_data.get('added', []):
            db_b_file = os.path.join(geminia_path, comp_type, f"{name}.sql")
            out_file = os.path.join(package_output_dir, f"merged_{name}.sql")
            if os.path.exists(db_b_file):
                shutil.copyfile(db_b_file, out_file)
                with open(out_file, "r+", encoding="utf-8") as f:
                    cleaned = clean_sql_content(f.read())
                    f.seek(0); f.write(cleaned); f.truncate()
                merged_files.append(out_file)
    return merged_files

def consolidate_merged_procedures_and_functions(package_name, merged_files_list, output_dir):
    """Consolidate all merged files into a single package"""
    os.makedirs(output_dir, exist_ok=True)
    consolidated_path = os.path.join(output_dir, f"{package_name}_MERGED.sql")
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    consolidated = f"""-- =====================================================
-- Package: {package_name}
-- Generated: {timestamp}
-- Description: Merged procedures and functions from Database A and B
-- =====================================================

CREATE OR REPLACE PACKAGE BODY {package_name} AS

"""
    for file_path in sorted(merged_files_list):
        if not os.path.exists(file_path): continue
        with open(file_path) as f: cleaned = clean_sql_content(f.read())
        if cleaned.strip():
            name = os.path.basename(file_path).replace('merged_', '').replace('.sql', '')
            consolidated += f"\n  -- {'-'*50}\n  -- {name.upper()}\n  -- {'-'*50}\n\n"
            consolidated += '\n'.join(['  ' + line if line.strip() else line for line in cleaned.splitlines()]) + "\n\n"
    consolidated += "END;\n/"
    with open(consolidated_path, "w") as f: f.write(consolidated)
    return consolidated_path

def process_single_package(package_name):
    """Process a single package through the entire workflow"""
    print(f"\n=== PROCESSING PACKAGE: {package_name} ===")
    src_a = get_package_source(DATABASE_CONFIGS['DATABASE_A'], package_name)
    src_b = get_package_source(DATABASE_CONFIGS['DATABASE_B'], package_name)
    if not src_a or not src_b:
        print(f"Failed to retrieve package {package_name}")
        return False
    comps_a, comps_b = parse_package_components(src_a), parse_package_components(src_b)
    save_components_to_disk(comps_a, package_name + "_DATABASE_A_BODY")
    save_components_to_disk(comps_b, package_name + "_DATABASE_B_BODY")
    save_components_as_json(comps_a, package_name + "_DATABASE_A_BODY")
    save_components_as_json(comps_b, package_name + "_DATABASE_B_BODY")
    differences = compare_components(comps_a, comps_b, package_name)
    diff_file = os.path.join(DIFFS_DIRECTORY, package_name, "differences.json")
    with open(diff_file, "w") as f: json.dump(differences, f, indent=4)
    merged_files = merge_all_database_procedures_and_functions(
        package_name, diff_file,
        f"packages/{package_name}_DATABASE_A_BODY",
        f"packages/{package_name}_DATABASE_B_BODY",
        OUTPUT_DIRECTORY
    )
    if merged_files:
        consolidated = consolidate_merged_procedures_and_functions(package_name, merged_files, CONSOLIDATED_DIRECTORY)
        print(f"Consolidated package created: {consolidated}")
    else:
        print(f"No differences to merge for {package_name}")
    return True

def run_package_comparison():
    """Main entrypoint to process all packages"""
    print("Starting Package Comparison and Merging Tool")
    for pkg in PACKAGES_TO_PROCESS:
        process_single_package(pkg)
    print("Processing complete!")


In [None]:
# ====================================================================
# EXECUTION CELL - RUN THIS TO START THE PROCESS
# ====================================================================

run_package_comparison()