In [None]:
import getpass
import os

# Prompt for API key
#LLAMA_CLOUD_API_KEY = getpass("Enter your LLAMA CLOUD API key: ")
#ANTHROPIC_API_KEY = getpass("Enter your Antropic API key: ")
os.environ["GOOGLE_API_KEY"] = getpass.getpass()

# Install required packages
# !pip install oracledb openai google-generativeai

# Ensure Oracle Instant Client is installed
# Refer to Oracle's documentation for installation instructions specific to your OS.

In [None]:
import oracledb as cx_Oracle
import re
import os
import json
import logging
import difflib
#import openai  # Import the OpenAI module
# Import the Python SDK
import google.generativeai as genai
from openai import OpenAI

In [None]:
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Retrieve OpenAI API key from environment variable
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
if not GOOGLE_API_KEY:
    logging.error("Google Gemini API key not found. Please set the 'GOOGLE_API_KEY' environment variable.")
else:
    GOOGLE_API_KEY = GOOGLE_API_KEY

In [None]:
client = OpenAI(
    api_key=GOOGLE_API_KEY,
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

In [None]:
# Database credentials
database_credentials = {
    "HERITAGE": {
        "host": "10.176.18.91",
        "port": 1522,
        "service_name": "HERITAGE19C",
        "username": "TQ_GIS",
        "password": "TQ_GIS"
    },
    "NEW_GEMINIA": {
        "host": "10.176.18.110",
        "port": 1521,
        "service_name": "NEW_GEMINIA",
        "username": "TQ_GIS",
        "password": "TQ_GIS"
    },
}

In [None]:
# New function to generate markdown reports using GPT-4
def generate_markdown_report(component_type, component_name, diff_file_path, report_dir='reports'):
    """
    Sends the diff to GPT-4 and generates a markdown report.
    """
    logging.info(f"Generating markdown report for {component_type} '{component_name}'.")
    os.makedirs(report_dir, exist_ok=True)
    
    with open(diff_file_path, 'r', encoding='utf-8') as f:
        diff_content = f.read()
    
    # Construct the prompt
    prompt = f"""
You are an expert PL/SQL developer. Below is a unified diff of a {component_type[:-1].capitalize()} named '{component_name}' between two versions of a PL/SQL package. Analyze the changes and generate a detailed markdown report with the following sections:

- **Summary of Key Changes:**
    - *Reordering of Conditional Logic:*
        - **HERITAGE Version:**
            - [Description]
        - **NEW_GEMINIA Version:**
            - [Description]
    - *Modification of WHERE Clauses:*
        - **Removal and Addition of Conditions:**
            - [Description]
    - *Exception Handling Adjustments:*
        - **HERITAGE Version:**
            - [Description]
        - **NEW_GEMINIA Version:**
            - [Description]
    - *Formatting and Indentation:*
        - [Description]

- **Implications of the Changes:**
    - *Logic Alteration in Fee Determination:*
        - **Priority Shift:**
            - **HERITAGE:** [Description]
            - **NEW_GEMINIA:** [Description]
        - **Potential Outcome Difference:**
            - [Description]
    - *Business Rule Alignment:*
        - [Description]
    - *Impact on Clients:*
        - [Description]

- **Recommendations for Merging:**
    - *Review Business Requirements:*
        - **Confirm Intent:**
            - [Description]
    - *Consult Stakeholders:*
        - [Description]
    - *Test Thoroughly:*
        - **Create Test Cases:**
            - [Description]
        - **Validate Outcomes:**
            - [Description]
    - *Merge Strategy:*
        - **Conditional Merge:**
            - [Description]
        - **Maintain Backward Compatibility:**
            - [Description]
    - *Update Documentation:*
        - [Description]
    - *Code Quality Improvements:*
        - **Consistent Exception Handling:**
            - [Description]
        - **Clean Up Code:**
            - [Description]

- **Potential Actions Based on Analysis:**
    - **If the Change Aligns with Business Goals:**
        - [Description]
    - **If the Change Does Not Align:**
        - [Description]
    - **If Uncertain:**
        - [Description]

- **Additional Considerations:**
    - *Database Integrity:*
        - [Description]
    - *Performance Impact:*
        - [Description]
    - *Error Messages:*
        - [Description]

- **Conclusion:**
    - [Summary of the overall analysis and final thoughts.]

Below is the unified diff:

```diff
{diff_content}
```
    """
    
    try:
        response = client.chat.completions.create(
                    model="gemini-1.5-flash",
                    messages=[
                        {"role": "system", "content": "You are an expert PL/SQL developer."},
                        {"role": "user", "content": prompt}
                    ],
                    temperature=0.3,
                    #max_tokens=2000,
                    n=1,
                    #stop=None,
        )
        print("RESPONSE FROM LLMs", response.choices[0].message.content)
        report = response.choices[0].message.content
        
        # Save the report to a markdown file
        report_file_path = os.path.join(report_dir, f"{component_type}_{component_name}_report.md")
        with open(report_file_path, 'w', encoding='utf-8') as f:
            f.write(report)
        print("WROTE REPORT", report)
        logging.info(f"Markdown report generated and saved to '{report_file_path}'.")
        return True  # Indicate success
    except Exception as e:
        logging.error(f"Failed to generate markdown report for {component_type} '{component_name}': {e}")
        # Save failed component information
        failed_reports.append((component_type, component_name, diff_file_path))
        return False  # Indicate failure

In [None]:
# Functions Definitions

def get_package_source(db_params, package_name, object_type='PACKAGE BODY'):
    logging.info(f"Connecting to database {db_params['service_name']} to retrieve {object_type} '{package_name}'.")
    try:
        dsn_tns = cx_Oracle.makedsn(
            db_params['host'],
            db_params['port'],
            service_name=db_params['service_name']
        )
        conn = cx_Oracle.connect(
            user=db_params['username'],
            password=db_params['password'],
            dsn=dsn_tns
        )
        cursor = conn.cursor()
        query = f"""
        SELECT text
        FROM all_source
        WHERE name = '{package_name.upper()}'
        AND type = '{object_type.upper()}'
        ORDER BY line
        """
        cursor.execute(query)
        source_lines = [row[0] for row in cursor.fetchall()]
        source = ''.join(source_lines)
        logging.info(f"Retrieved {len(source)} characters of source code from {db_params['service_name']}.")
    except cx_Oracle.DatabaseError as e:
        logging.error(f"Database connection failed: {e}")
        source = ""
    finally:
        try:
            cursor.close()
            conn.close()
        except:
            pass
    return source

def parse_package_components(source_code):
    logging.info("Parsing package components.")
    components = {
        'procedures': {},
        'functions': {},
        'cursors': {},
        'types': {},
        'variables': {},
    }

    # Define patterns for procedures and functions
    proc_pattern = re.compile(
        r"""
        PROCEDURE\s+([\w$]+)\s*               # Match PROCEDURE and its name
        \(.*?\)\s*                           # Match parameter list (if any), non-greedy
        (.*?)                                # Match the body lazily
        (?=PROCEDURE|FUNCTION|\Z)            # Stop at the next PROCEDURE/FUNCTION or end of file
        """,
        re.IGNORECASE | re.DOTALL | re.VERBOSE
    )
    
    func_pattern = re.compile(
        r"""
        FUNCTION\s+([\w$]+)\s*               # Match FUNCTION and its name
        \(.*?\)\s*                           # Match parameter list (if any), non-greedy
        (.*?)                                # Match the body lazily
        (?=PROCEDURE|FUNCTION|\Z)            # Stop at the next PROCEDURE/FUNCTION or end of file
        """,
        re.IGNORECASE | re.DOTALL | re.VERBOSE
    )

    # Extract procedures
    procedure_matches = list(proc_pattern.finditer(source_code))
    logging.info(f"Found {len(procedure_matches)} procedures.")
    for match in procedure_matches:
        name = match.group(1)
        definition = match.group(0)
        components['procedures'][name] = definition.strip()
        logging.debug(f"Parsed procedure: {name}")

    # Extract functions
    function_matches = list(func_pattern.finditer(source_code))
    logging.info(f"Found {len(function_matches)} functions.")
    for match in function_matches:
        name = match.group(1)
        definition = match.group(0)
        components['functions'][name] = definition.strip()
        logging.debug(f"Parsed function: {name}")

    # Extract the declaration section between IS/AS and BEGIN
    declaration_section_match = re.search(r'(IS|AS)\s+(.*?)\s+BEGIN', source_code, re.IGNORECASE | re.DOTALL)
    if declaration_section_match:
        declaration_section = declaration_section_match.group(2)

        # Patterns for cursors, types, and variables
        cursor_pattern = re.compile(
            r'CURSOR\s+([\w$]+)\s*(IS|AS)\s+(.*?);',
            re.IGNORECASE | re.DOTALL
        )

        type_pattern = re.compile(
            r'TYPE\s+([\w$]+)\s+(IS|AS)\s+(.*?);',
            re.IGNORECASE | re.DOTALL
        )

        variable_pattern = re.compile(
            r'(\w+)\s+(CONSTANT\s+)?[\w%\.]+(\([\d\s,]*\))?(\s+NOT\s+NULL)?\s*(:=\s*.*?|)\s*;',
            re.IGNORECASE | re.DOTALL
        )

        # Extract cursors
        cursor_matches = list(cursor_pattern.finditer(declaration_section))
        logging.info(f"Found {len(cursor_matches)} cursors.")
        for match in cursor_matches:
            name = match.group(1)
            definition = match.group(0)
            components['cursors'][name] = definition.strip()
            logging.debug(f"Parsed cursor: {name}")

        # Extract types
        type_matches = list(type_pattern.finditer(declaration_section))
        logging.info(f"Found {len(type_matches)} types.")
        for match in type_matches:
            name = match.group(1)
            definition = match.group(0)
            components['types'][name] = definition.strip()
            logging.debug(f"Parsed type: {name}")

        # Extract variables
        variable_matches = list(variable_pattern.finditer(declaration_section))
        logging.info(f"Found {len(variable_matches)} variables.")
        for match in variable_matches:
            name = match.group(1)
            definition = match.group(0)
            components['variables'][name] = definition.strip()
            logging.debug(f"Parsed variable: {name}")

    logging.info("Finished parsing package components.")
    return components

def save_components_to_disk(components, package_name, base_directory='packages'):
    logging.info(f"Saving components of '{package_name}' to disk.")
    package_dir = os.path.join(base_directory, package_name)
    os.makedirs(package_dir, exist_ok=True)

    total_components = 0
    for comp_type, comp_dict in components.items():
        type_dir = os.path.join(package_dir, comp_type)
        os.makedirs(type_dir, exist_ok=True)
        for name, definition in comp_dict.items():
            # Clean the name to be file-system friendly
            safe_name = ''.join(c if c.isalnum() or c in ' _-' else '_' for c in name)
            file_name = f"{safe_name}.sql"
            file_path = os.path.join(type_dir, file_name)
            with open(file_path, 'w', encoding='utf-8') as f:
                f.write(definition)
            total_components += 1
    logging.info(f"Saved {total_components} components of '{package_name}' to '{package_dir}'.")

def save_components_as_json(components, package_name, base_directory='packages'):
    logging.info(f"Saving components of '{package_name}' as JSON.")
    package_dir = os.path.join(base_directory, package_name)
    os.makedirs(package_dir, exist_ok=True)

    json_path = os.path.join(package_dir, f"{package_name}_components.json")
    with open(json_path, 'w', encoding='utf-8') as f:
        json.dump(components, f, indent=4)
    logging.info(f"Components saved as JSON to '{json_path}'.")

def compare_components(components1, components2, package_name):
    logging.info("Comparing components with detailed diffs.")
    differences = {}
    diffs_output_dir = os.path.join('diffs', package_name)
    os.makedirs(diffs_output_dir, exist_ok=True)

    for comp_type in components1.keys():
        set1 = set(components1[comp_type].keys())
        set2 = set(components2[comp_type].keys())

        added = set2 - set1
        removed = set1 - set2
        modified = set()

        for common in set1 & set2:
            content1 = components1[comp_type][common].strip().splitlines()
            content2 = components2[comp_type][common].strip().splitlines()
            if content1 != content2:
                modified.add(common)
                # Generate diff
                diff = difflib.unified_diff(
                    content1, content2,
                    fromfile=f'{package_name}_HERITAGE_{comp_type}_{common}.sql',
                    tofile=f'{package_name}_NEW_GEMINIA_{comp_type}_{common}.sql',
                    lineterm=''
                )
                diff_output = '\n'.join(diff)
                # Save diff to file
                diff_file_path = os.path.join(diffs_output_dir, f'{comp_type}_{common}_diff.txt')
                with open(diff_file_path, 'w', encoding='utf-8') as f:
                    f.write(diff_output)
                logging.debug(f"Diff for {comp_type} '{common}' saved to '{diff_file_path}'.")
        differences[comp_type] = {
            'added': list(added),
            'removed': list(removed),
            'modified': list(modified)
        }
    logging.info("Finished comparing components with diffs.")
    return differences
    

In [None]:
def compare_plsql_packages(package_name, log_level='INFO'):
    global failed_reports  # Declare as global to modify within the function
    failed_reports = []  # List to keep track of failed report generations

    # Set logging level
    numeric_level = getattr(logging, log_level.upper(), None)
    if not isinstance(numeric_level, int):
        logging.warning(f"Invalid log level: {log_level}. Defaulting to INFO.")
        numeric_level = logging.INFO
    logging.getLogger().setLevel(numeric_level)

    logging.info(f"Starting comparison for package '{package_name}'.")

    # Get the package body source code from both databases
    logging.info("Retrieving package sources.")
    source_body_heritage = get_package_source(database_credentials['HERITAGE'], package_name, 'PACKAGE BODY')
    source_body_new_geminia = get_package_source(database_credentials['NEW_GEMINIA'], package_name, 'PACKAGE BODY')

    if not source_body_heritage:
        logging.error(f"Failed to retrieve PACKAGE BODY from HERITAGE for package '{package_name}'.")
        return
    if not source_body_new_geminia:
        logging.error(f"Failed to retrieve PACKAGE BODY from NEW_GEMINIA for package '{package_name}'.")
        return

    # Parse components from package body
    logging.info("Parsing package components from HERITAGE.")
    components_body_heritage = parse_package_components(source_body_heritage)
    logging.info("Parsing package components from NEW_GEMINIA.")
    components_body_new_geminia = parse_package_components(source_body_new_geminia)

    # Save components to disk
    logging.info("Saving components to disk.")
    save_components_to_disk(components_body_heritage, package_name + '_HERITAGE_BODY')
    save_components_to_disk(components_body_new_geminia, package_name + '_NEW_GEMINIA_BODY')

    # Optionally, save as JSON
    logging.info("Saving components as JSON.")
    save_components_as_json(components_body_heritage, package_name + '_HERITAGE_BODY')
    save_components_as_json(components_body_new_geminia, package_name + '_NEW_GEMINIA_BODY')

    # Compare packages with detailed diffs
    differences = compare_components(components_body_heritage, components_body_new_geminia, package_name)

    # Save differences to a JSON file for later use
    differences_file = os.path.join('diffs', package_name, 'differences.json')
    with open(differences_file, 'w', encoding='utf-8') as f:
        json.dump(differences, f, indent=4)
    logging.info(f"Differences saved to '{differences_file}'.")

    # Output differences with summaries and generate markdown reports
    logging.info("Outputting differences with summaries and generating markdown reports.")
    for comp_type, diff in differences.items():
        print(f"\nDifferences in {comp_type}:")
        if diff['added']:
            print(f"  Added in NEW_GEMINIA: {diff['added']}")
        if diff['removed']:
            print(f"  Removed from NEW_GEMINIA: {diff['removed']}")
        if diff['modified']:
            print(f"  Modified: {diff['modified']}")
            for name in diff['modified']:
                diff_file_path = os.path.join('diffs', package_name, f'{comp_type}_{name}_diff.txt')
                print(f"    - Diff for {name} saved at: {diff_file_path}")
                # Generate markdown report using GPT-4
                success = generate_markdown_report(comp_type, name, diff_file_path)
                if not success:
                    logging.error(f"Report generation failed for {comp_type} '{name}'.")

    # Check if any report generations failed
    if failed_reports:
        logging.error("Some report generations failed. You can retry generating reports for these components using the 'retry_failed_reports' function.")
        # Save failed reports info to a JSON file
        failed_reports_file = os.path.join('reports', 'failed_reports.json')
        with open(failed_reports_file, 'w', encoding='utf-8') as f:
            json.dump(failed_reports, f, indent=4)
        logging.info(f"Failed report details saved to '{failed_reports_file}'.")

    logging.info(f"Finished comparison for package '{package_name}'.")

In [None]:
def retry_failed_reports():
    """
    Retries generating markdown reports for components that previously failed.
    """
    global failed_reports
    if not failed_reports:
        logging.info("No failed reports to retry.")
        return

    logging.info("Retrying failed markdown report generations.")
    successful_retries = []
    remaining_failures = []

    for comp_type, name, diff_file_path in failed_reports:
        success = generate_markdown_report(comp_type, name, diff_file_path)
        if success:
            successful_retries.append((comp_type, name))
        else:
            remaining_failures.append((comp_type, name, diff_file_path))

    if successful_retries:
        logging.info(f"Successfully retried and generated reports for: {successful_retries}")

    if remaining_failures:
        logging.error(f"Still failed to generate reports for: {[(ct, n) for ct, n, _ in remaining_failures]}")
        # Update the failed_reports global variable
        failed_reports = remaining_failures
        # Update the failed reports JSON file
        failed_reports_file = os.path.join('reports', 'failed_reports.json')
        with open(failed_reports_file, 'w', encoding='utf-8') as f:
            json.dump(failed_reports, f, indent=4)
    else:
        logging.info("All failed reports have been successfully generated.")
        failed_reports = []  # Clear the failed reports list
        # Remove the failed reports JSON file
        failed_reports_file = os.path.join('reports', 'failed_reports.json')
        if os.path.exists(failed_reports_file):
            os.remove(failed_reports_file)

In [None]:
# Replace 'YOUR_PACKAGE_NAME' with the actual package name and set desired log level
# compare_plsql_packages(package_name='GIN_STP_PKG', log_level='DEBUG')

In [None]:
# If there were any failures in report generation, you can retry them:
# retry_failed_reports()

In [None]:
# Configure logging
logging.basicConfig(level=logging.CRITICAL, format='%(asctime)s - %(levelname)s - %(message)s')

# Retrieve OpenAI API key from environment variable
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
if not GOOGLE_API_KEY:
    logging.error("Google Gemini API key not found. Please set the 'GOOGLE_API_KEY' environment variable.")
else:
    GOOGLE_API_KEY = GOOGLE_API_KEY

In [None]:

import google.generativeai as genai

# Configure the model using the API key from the previous cell
genai.configure(api_key=GOOGLE_API_KEY)

# Optional: Set generation configuration (modify as needed)
generation_config = {
    "temperature": 0.2,  # Controls randomness (1=more random, 0=less random)
    "top_p": 0.95,    # Probability distribution weighting
    "max_output_tokens": 8192, 
}

# Optionally create a GenerativeModel instance with specific settings
model = genai.GenerativeModel(
    # model_name="gemini-2.0-flash-exp",
    model_name="gemini-1.5-flash",
    generation_config=generation_config,
    system_instruction=" ",
)

# Print confirmation message
print("Gemini model configured successfully!")

In [None]:
import os
import logging
import google.generativeai as genai

def merge_database_package_procedures_with_history(
    differences_file_path: str,
    heritage_package_procedure: str,
    geminia_package_procedure: str,
    output_package_path: str,
) -> str:
    """
    Merges procedures using Gemini with history and completion checks using 'FINAL_MERGING_DONE'.

    Args:
        differences_file_path: Path to the file containing differences between the procedures.
        heritage_package_procedure: Path to the heritage procedure file.
        geminia_package_procedure: Path to the geminia procedure file.
        output_package_path: Path to save the merged procedure.

    Returns:
        Path of the saved merged procedure.
    """

    genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

    generation_config = {
        "temperature": 1,
        "top_p": 0.95,
        "max_output_tokens": 8192,
        "response_mime_type": "text/plain",
    }

    model = genai.GenerativeModel(
        model_name="gemini-2.0-flash-exp",
        generation_config=generation_config,
        system_instruction="You are a senior PL/SQL developer proficient in merging procedures and functions. Indicate completion with the word 'FINAL_MERGING_DONE' at a new line at the bottom of the last code.",
    )

    try:
        with open(differences_file_path, "r") as diff_file:
            diff_content = diff_file.read()

        with open(heritage_package_procedure, "r") as heritage_file:
            heritage_content = heritage_file.read()

        with open(geminia_package_procedure, "r") as geminia_file:
            geminia_content = geminia_file.read()

        base_prompt = f"""
You are an expert PL/SQL developer. Your task is to merge two plsql procedures into a single harmonized procedure.
The differences between the two procedures are stored in here (you will only use this for context, dont paste it directly):
{diff_content}
The first procedure is here: Heritage Procedure:
{heritage_content}
The second procedure that will be used for comparison and merging is here: Geminia Procedure:
{geminia_content}
When the final output is done, write a new line below the final code saying "FINAL_MERGING_DONE" at the end of your response.
        """

        merged_chunks = []
        history = [{"role": "user", "parts": [base_prompt]}]
        chat_session = model.start_chat(history=history)

        while True:
            response = chat_session.send_message("continue")  
            chunk = response.text.strip()
            merged_chunks.append(chunk)
            history.append({"role": "model", "parts": [chunk]})

            if "FINAL_MERGING_DONE" in chunk.upper():
                merged_chunks[-1] = merged_chunks[-1].replace("FINAL_MERGING_DONE", "").strip()
                break
            else:
                history.append({"role": "user", "parts": ["Continue."]})
                chat_session = model.start_chat(history=history)

        merged_content = "\n\n".join(merged_chunks)
        output_dir = os.path.dirname(output_package_path) or "."
        os.makedirs(output_dir, exist_ok=True)

        with open(output_package_path, "w") as output_file:
            output_file.write(merged_content)

        logging.info(f"Merged package written to {output_package_path}")
        return output_package_path

    except Exception as e:
        logging.error(f"LLM Error: {e}")
        raise RuntimeError(f"Failed to merge using LLM: {e}")


In [None]:
import os
import json
import logging

def merge_all_database_procedures_and_functions(package_name, differences_file, heritage_path, geminia_path, output_dir):
    """
    Iterates through procedures/functions in the differences JSON file and merges them dynamically.
    
    Args:
        package_name (str): Name of the package being processed.
        differences_file (str): Path to the JSON file containing differences for all components.
        heritage_path (str): Path to the Heritage package directory.
        geminia_path (str): Path to the Geminia package directory.
        output_dir (str): Directory to save the merged files.
        llm_client (object): An instance of an LLM client for processing the merge.

    Returns:
        list: List of paths to the merged files.
    """
    # Load differences JSON file
    if not os.path.exists(differences_file):
        raise FileNotFoundError(f"Differences file not found: {differences_file}")

    with open(differences_file, 'r') as file:
        differences = json.load(file)

    merged_files = []

    for comp_type, diff_data in differences.items():
        # Process only procedures and functions
        if comp_type not in ['procedures', 'functions']:
            continue

        logging.info(f"Processing {comp_type}...")
        for name in diff_data.get('modified', []) + diff_data.get('added', []):
            # Define paths for this procedure/function
            diff_file_path = os.path.join('diffs', package_name, f"{comp_type}_{name}_diff.txt")
            heritage_file_path = os.path.join(heritage_path, comp_type, f"{name}.sql")
            geminia_file_path = os.path.join(geminia_path, comp_type, f"{name}.sql")
            output_file_path = os.path.join(output_dir, f"merged_{name}.sql")

            # Check if required files exist
            if not os.path.exists(diff_file_path):
                logging.warning(f"Diff file not found: {diff_file_path}. Skipping {name}.")
                continue
            if not os.path.exists(heritage_file_path):
                logging.warning(f"Heritage procedure/function file not found: {heritage_file_path}. Skipping {name}.")
                continue
            if not os.path.exists(geminia_file_path):
                logging.warning(f"Geminia procedure/function file not found: {geminia_file_path}. Skipping {name}.")
                continue

            # Merge the procedure/function
            try:
                merged_file = merge_database_package_procedures_with_history(
                    differences_file_path=diff_file_path,
                    heritage_package_procedure=heritage_file_path,
                    geminia_package_procedure=geminia_file_path,
                    output_package_path=output_file_path
                )
                merged_files.append(merged_file)
                logging.info(f"Merged file created: {merged_file}")
            except Exception as e:
                logging.error(f"Failed to merge {comp_type} '{name}': {e}")

    return merged_files

In [None]:
package_name = "GIN_STP_PKG"
differences_file = os.path.join("diffs", package_name, "differences.json")
merged_files = merge_all_database_procedures_and_functions(
    package_name=package_name,
    differences_file=differences_file,
    heritage_path="packages/GIN_STP_PKG_HERITAGE_BODY",
    geminia_path="packages/GIN_STP_PKG_NEW_GEMINIA_BODY",
    output_dir="output"
)


print(f"Merged files created at: {merged_files}")


In [None]:
import os
import re
import logging

def clean_sql_code_blocks(file_path: str) -> str:
    """
    Removes ```sql at the beginning and ``` at the end of SQL files.
    
    Args:
        file_path (str): Path to the SQL file to clean
        
    Returns:
        str: Path to the cleaned file (same as input)
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
        
        # Remove ```sql from the beginning (case insensitive)
        content = re.sub(r'^```sql\s*\n?', '', content, flags=re.IGNORECASE)
        
        # Remove ``` from the end
        content = re.sub(r'\n?\s*```\s*$', '', content)
        
        # Write back the cleaned content
        with open(file_path, 'w', encoding='utf-8') as file:
            file.write(content)
        
        logging.info(f"Cleaned code blocks from: {file_path}")
        return file_path
        
    except Exception as e:
        logging.error(f"Error cleaning file {file_path}: {e}")
        raise

def clean_all_sql_files_in_directory(directory_path: str) -> list:
    """
    Cleans all SQL files in a directory and its subdirectories.
    
    Args:
        directory_path (str): Path to the directory containing SQL files
        
    Returns:
        list: List of cleaned file paths
    """
    cleaned_files = []
    
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.endswith('.sql'):
                file_path = os.path.join(root, file)
                try:
                    clean_sql_code_blocks(file_path)
                    cleaned_files.append(file_path)
                except Exception as e:
                    logging.error(f"Failed to clean {file_path}: {e}")
    
    return cleaned_files

def clean_merged_files(merged_files_list: list) -> list:
    """
    Cleans a list of merged SQL files.
    
    Args:
        merged_files_list (list): List of file paths to clean
        
    Returns:
        list: List of cleaned file paths
    """
    cleaned_files = []
    
    for file_path in merged_files_list:
        try:
            clean_sql_code_blocks(file_path)
            cleaned_files.append(file_path)
        except Exception as e:
            logging.error(f"Failed to clean {file_path}: {e}")
    
    return cleaned_files


In [None]:
import os
import json
import logging
import re
from datetime import datetime

def create_package_header(package_name: str) -> str:
    """
    Creates a standard package header with timestamp.
    
    Args:
        package_name (str): Name of the package
        
    Returns:
        str: Package header content
    """
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    
    header = f"""-- =====================================================
-- Package: {package_name}
-- Generated: {timestamp}
-- Description: Merged procedures and functions from heritage and geminia versions
-- =====================================================

CREATE OR REPLACE PACKAGE BODY {package_name} AS

"""
    return header

def create_package_footer() -> str:
    """
    Creates a standard package footer.
    
    Returns:
        str: Package footer content
    """
    return "\nEND;\n/"

def clean_sql_content(content: str) -> str:
    """
    Cleans SQL content by removing code blocks and extra whitespace.
    
    Args:
        content (str): Raw SQL content
        
    Returns:
        str: Cleaned SQL content
    """
    # Remove ```sql at the beginning (case insensitive)
    content = re.sub(r'^```sql\s*\n?', '', content, flags=re.IGNORECASE)
    
    # Remove ``` at the end
    content = re.sub(r'\n?\s*```\s*$', '', content)
    
    # Remove FINAL_MERGING_DONE if present
    content = re.sub(r'\n?\s*FINAL_MERGING_DONE\s*$', '', content, flags=re.IGNORECASE)
    
    # Clean up extra whitespace but preserve SQL formatting
    content = content.strip()
    
    return content

def consolidate_merged_procedures_and_functions(
    package_name: str,
    merged_files_dir: str,
    output_dir: str = None
) -> str:
    """
    Consolidates all merged procedures and functions into a single package file.
    
    Args:
        package_name (str): Name of the original package (e.g., "GIN_STP_PKG")
        merged_files_dir (str): Directory containing merged SQL files
        output_dir (str, optional): Directory to save the consolidated package. 
                                   Defaults to same as merged_files_dir
        
    Returns:
        str: Path to the consolidated package file
    """
    if output_dir is None:
        output_dir = merged_files_dir
    
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Generate the consolidated package filename
    consolidated_filename = f"{package_name}_MERGED.sql"
    consolidated_path = os.path.join(output_dir, consolidated_filename)
    
    # Get all merged SQL files
    merged_files = []
    if os.path.exists(merged_files_dir):
        for file in os.listdir(merged_files_dir):
            if file.endswith('.sql') and file.startswith('merged_'):
                merged_files.append(os.path.join(merged_files_dir, file))
    
    if not merged_files:
        raise FileNotFoundError(f"No merged files found in {merged_files_dir}")
    
    # Sort files for consistent ordering
    merged_files.sort()
    
    # Start building the consolidated content
    consolidated_content = create_package_header(package_name)
    
    procedure_count = 0
    function_count = 0
    
    # Process each merged file
    for file_path in merged_files:
        try:
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()
            
            # Clean the content
            cleaned_content = clean_sql_content(content)
            
            if cleaned_content.strip():
                # Extract the procedure/function name from filename
                filename = os.path.basename(file_path)
                component_name = filename.replace('merged_', '').replace('.sql', '')
                
                # Add a comment header for each component
                consolidated_content += f"\n  -- {'-' * 50}\n"
                consolidated_content += f"  -- {component_name.upper()}\n"
                consolidated_content += f"  -- {'-' * 50}\n\n"
                
                # Add the cleaned content with proper indentation
                indented_content = '\n'.join(['  ' + line if line.strip() else line 
                                            for line in cleaned_content.split('\n')])
                consolidated_content += indented_content + "\n\n"
                
                # Count procedures and functions
                if 'PROCEDURE' in cleaned_content.upper():
                    procedure_count += 1
                elif 'FUNCTION' in cleaned_content.upper():
                    function_count += 1
                
                logging.info(f"Added {component_name} to consolidated package")
                
        except Exception as e:
            logging.error(f"Error processing file {file_path}: {e}")
            continue
    
    # Add the package footer
    consolidated_content += create_package_footer()
    
    # Write the consolidated package
    with open(consolidated_path, 'w', encoding='utf-8') as output_file:
        output_file.write(consolidated_content)
    
    logging.info(f"Consolidated package created: {consolidated_path}")
    logging.info(f"Total components: {procedure_count} procedures, {function_count} functions")
    
    return consolidated_path

def consolidate_from_file_list(
    package_name: str,
    merged_files_list: list,
    output_dir: str
) -> str:
    """
    Consolidates procedures and functions from a list of file paths.
    
    Args:
        package_name (str): Name of the original package
        merged_files_list (list): List of paths to merged SQL files
        output_dir (str): Directory to save the consolidated package
        
    Returns:
        str: Path to the consolidated package file
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Generate the consolidated package filename
    consolidated_filename = f"{package_name}_MERGED.sql"
    consolidated_path = os.path.join(output_dir, consolidated_filename)
    
    if not merged_files_list:
        raise ValueError("No merged files provided")
    
    # Start building the consolidated content
    consolidated_content = create_package_header(package_name)
    
    procedure_count = 0
    function_count = 0
    
    # Process each merged file
    for file_path in sorted(merged_files_list):
        if not os.path.exists(file_path):
            logging.warning(f"File not found: {file_path}")
            continue
            
        try:
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()
            
            # Clean the content
            cleaned_content = clean_sql_content(content)
            
            if cleaned_content.strip():
                # Extract the procedure/function name from filename
                filename = os.path.basename(file_path)
                component_name = filename.replace('merged_', '').replace('.sql', '')
                
                # Add a comment header for each component
                consolidated_content += f"\n  -- {'-' * 50}\n"
                consolidated_content += f"  -- {component_name.upper()}\n"
                consolidated_content += f"  -- {'-' * 50}\n\n"
                
                # Add the cleaned content with proper indentation
                indented_content = '\n'.join(['  ' + line if line.strip() else line 
                                            for line in cleaned_content.split('\n')])
                consolidated_content += indented_content + "\n\n"
                
                # Count procedures and functions
                if 'PROCEDURE' in cleaned_content.upper():
                    procedure_count += 1
                elif 'FUNCTION' in cleaned_content.upper():
                    function_count += 1
                
                logging.info(f"Added {component_name} to consolidated package")
                
        except Exception as e:
            logging.error(f"Error processing file {file_path}: {e}")
            continue
    
    # Add the package footer
    consolidated_content += create_package_footer()
    
    # Write the consolidated package
    with open(consolidated_path, 'w', encoding='utf-8') as output_file:
        output_file.write(consolidated_content)
    
    logging.info(f"Consolidated package created: {consolidated_path}")
    logging.info(f"Total components: {procedure_count} procedures, {function_count} functions")
    
    return consolidated_path

    

In [None]:

# Consolidate from directory
package_name = "GIN_STP_PKG"
consolidated_package = consolidate_merged_procedures_and_functions(
	package_name=package_name,
	merged_files_dir="output",
	output_dir="consolidated_packages"
)
print(f"Consolidated package created: {consolidated_package}")

