In [1]:
import getpass
import os

# Prompt for API key
#LLAMA_CLOUD_API_KEY = getpass("Enter your LLAMA CLOUD API key: ")
#ANTHROPIC_API_KEY = getpass("Enter your Antropic API key: ")
os.environ["GOOGLE_API_KEY"] = getpass.getpass()

# Install required packages
#!pip install cx_Oracle openai

# Ensure Oracle Instant Client is installed
# Refer to Oracle's documentation for installation instructions specific to your OS.


 ········


In [2]:
import cx_Oracle
import re
import os
import json
import logging
import difflib
#import openai  # Import the OpenAI module
# Import the Python SDK
import google.generativeai as genai
from openai import OpenAI

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Retrieve OpenAI API key from environment variable
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
if not GOOGLE_API_KEY:
    logging.error("Google Gemini API key not found. Please set the 'GOOGLE_API_KEY' environment variable.")
else:
    GOOGLE_API_KEY = GOOGLE_API_KEY

In [4]:
client = OpenAI(
    api_key=GOOGLE_API_KEY,
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

In [5]:
# New function to compare components using GPT-4
def compare_components_gpt4(components1, components2, package_name):
    """
    Compares components from two PL/SQL packages using GPT-4 and generates detailed comparison reports.

    Parameters:
    - components1: Dictionary of components from the first package (e.g., HERITAGE)
    - components2: Dictionary of components from the second package (e.g., NEW_GEMINIA)
    - package_name: Name of the package being compared

    Returns:
    - differences: A dictionary containing added, removed, and modified components for each component type.
    """
    logging.info("Comparing components using GPT-4.")
    differences = {}
    reports_output_dir = os.path.join('gpt_reports', package_name)
    os.makedirs(reports_output_dir, exist_ok=True)

    for comp_type in components1.keys():
        set1 = set(components1[comp_type].keys())
        set2 = set(components2[comp_type].keys())

        added = set2 - set1
        removed = set1 - set2
        modified = set()

        for common in set1 & set2:
            content1 = components1[comp_type][common].strip()
            content2 = components2[comp_type][common].strip()
            if content1 != content2:
                modified.add(common)
                # Generate comparison report using GPT-4
                report = compare_plsql_procedures_gpt4(common, content1, content2, comp_type, package_name)
                if report:
                    # Save the report to a markdown file
                    report_file_path = os.path.join(reports_output_dir, f"{comp_type}_{common}_comparison_report.md")
                    with open(report_file_path, 'w', encoding='utf-8') as f:
                        f.write(report)
                    logging.info(f"Comparison report for {comp_type} '{common}' saved to '{report_file_path}'.")
                else:
                    logging.error(f"Failed to generate comparison report for {comp_type} '{common}'.")

        differences[comp_type] = {
            'added': list(added),
            'removed': list(removed),
            'modified': list(modified)
        }
    logging.info("Finished comparing components using GPT-4.")
    return differences

def compare_plsql_procedures_gpt4(proc_name, proc_code1, proc_code2, comp_type, package_name):
    """
    Compares two PL/SQL components (procedures, functions, etc.) using GPT-4 and returns a formatted report.

    Parameters:
    - proc_name: Name of the component
    - proc_code1: Code of the component from the first package
    - proc_code2: Code of the component from the second package
    - comp_type: Type of the component (e.g., 'procedures', 'functions')
    - package_name: Name of the package being compared

    Returns:
    - report: A string containing the formatted comparison report
    """
    # Construct the prompt
    prompt = f"""
You are an expert PL/SQL developer tasked with comparing two versions of a PL/SQL procedure 
and generating a detailed report highlighting the differences. 
Your primary goal is to ensure accuracy in identifying changes, 
especially in business logic, to prevent regression issues and facilitate easy merging of the two versions.

Here are the details of the two procedure versions you need to compare:

Procedure Name: <proc_name>{{proc_name}}</proc_name>
Package Name: <package_name>{{package_name}}</package_name>

Version 1 ({package_name}_HERITAGE):
```sql
{proc_code1}
```

Version 2 ({package_name}_NEW_GEMINIA):
```sql
{proc_code2}
```


Your task is to analyze these two versions and produce a detailed comparison report. Follow these steps:

1. Carefully examine both versions of the procedure.
2. Identify all differences between the two versions.
3. Categorize the differences into the following areas:
   - Changes in Logic
   - Changes in Parameters
   - Changes in SQL Statements
   - Changes in Exception Handling
   - Other Changes
4. For each identified difference, consider its potential impact on the procedure's functionality and performance.
5. Provide clear, actionable insights that would help an intern merge these two procedures effectively.

Before writing your final report, break down your comparison process in <comparison_process> tags. This will help ensure a thorough and accurate analysis. In this section:

1. List the main sections of each procedure side by side.
2. Identify and quote specific differences in each section.
3. Categorize each difference according to the provided categories.
4. Consider potential impacts of each difference.

Pay special attention to changes in business logic, as these are critical for preventing regression issues.

After your analysis, provide your final report in Markdown format with the following structure:

1. ## Introduction
   - Brief overview of the comparison task
   - Names of the procedures being compared

2. ## Summary of Differences
   - ### Changes in Logic
   - ### Changes in Parameters
   - ### Changes in SQL Statements
   - ### Changes in Exception Handling
   - ### Other Changes

3. ## Detailed Comparison
   - For each significant difference:
     - Explain what changed
     - Discuss potential implications
     - Provide guidance for merging (if applicable)

4. ## Conclusion
   - Overall assessment of the changes
   - Recommendations for merging the procedures

Ensure your report is clear, concise, and focused on helping the team understand the differences and merge the procedures effectively.
"""

    try:
        response = client.chat.completions.create(
                    model="gemini-1.5-flash",
                    messages=[
                        {"role": "system", "content": "You are an expert PL/SQL developer."},
                        {"role": "user", "content": prompt}
                    ],
                    temperature=0.3,
                    #max_tokens=2000,
                    n=1,
                    #stop=None,
        )
        print("RESPONSE FROM LLMs", response.choices[0].message.content)
        report = response.choices[0].message.content
        return report
    except Exception as e:
        logging.error(f"Error during Geminia API call for component '{proc_name}': {e}")
        return None

In [6]:
def load_components_from_directory(base_dir, package_name, package_variant):
    """
    Loads PL/SQL components from the specified package variant directory.

    Parameters:
    - base_dir: Base directory where package directories are located (e.g., 'packages')
    - package_variant: Variant of the package ('HERITAGE_BODY' or 'GEMINIA_BODY')

    Returns:
    - components: A dictionary with component types as keys and dictionaries of component names and codes as values.
    """
    components = {
        'procedures': {},
        'functions': {},
        'cursors': {},
        'types': {},
        'variables': {},
    }

    # Construct the full path to the package variant directory
    package_dir = os.path.join(base_dir, f'{package_name}_{package_variant}')
    
    if not os.path.isdir(package_dir):
        logging.error(f"Directory '{package_dir}' does not exist.")
        return components

    logging.info(f"Loading components from '{package_dir}'.")

    for comp_type in components.keys():
        comp_type_dir = os.path.join(package_dir, comp_type)
        if not os.path.isdir(comp_type_dir):
            logging.warning(f"Component type directory '{comp_type_dir}' does not exist. Skipping.")
            continue

        for filename in os.listdir(comp_type_dir):
            if filename.endswith('.sql'):
                comp_name = os.path.splitext(filename)[0].upper()  # Normalize to uppercase
                file_path = os.path.join(comp_type_dir, filename)
                try:
                    with open(file_path, 'r', encoding='utf-8') as file:
                        code = file.read().strip()
                        components[comp_type][comp_name] = code
                        logging.debug(f"Loaded {comp_type[:-1].capitalize()} '{comp_name}' from '{file_path}'.")
                except Exception as e:
                    logging.error(f"Failed to read file '{file_path}': {e}")

    logging.info(f"Loaded components from '{package_variant}'.")
    return components


In [7]:
def main():
    # Define base directory and package variants
    base_directory = 'packages2'
    heritage_variant = 'HERITAGE_BODY'
    geminia_variant = 'NEW_GEMINIA_BODY'
    package_name = 'GIN_STP_PKG'  # Replace with your actual package name

    # Load components from HERITAGE and GEMINIA variants
    logging.info("Starting to load HERITAGE components.")
    heritage_components = load_components_from_directory(base_directory, package_name, heritage_variant)
    
    logging.info("Starting to load GEMINIA components.")
    geminia_components = load_components_from_directory(base_directory, package_name, geminia_variant)

    # Proceed only if components are loaded
    if not any(heritage_components.values()):
        logging.error("No HERITAGE components loaded. Aborting comparison.")
        return
    if not any(geminia_components.values()):
        logging.error("No GEMINIA components loaded. Aborting comparison.")
        return

    # Compare the components using GPT-4
    logging.info("Starting comparison of components using GPT-4.")
    differences = compare_components_gpt4(heritage_components, geminia_components, package_name)

    # Output the differences
    logging.info("Comparison completed. Summary of differences:")
    for comp_type, diff in differences.items():
        logging.info(f"\nComponent Type: {comp_type.capitalize()}")
        logging.info(f"Added: {diff['added']}")
        logging.info(f"Removed: {diff['removed']}")
        logging.info(f"Modified: {diff['modified']}")

    # Optionally, you can save or process the 'differences' dictionary as needed



In [8]:
if __name__ == "__main__":
    main()

2024-12-05 16:59:03,677 - INFO - Starting to load HERITAGE components.
2024-12-05 16:59:03,679 - INFO - Loading components from 'packages2\GIN_STP_PKG_HERITAGE_BODY'.
2024-12-05 16:59:03,689 - INFO - Loaded components from 'HERITAGE_BODY'.
2024-12-05 16:59:03,690 - INFO - Starting to load GEMINIA components.
2024-12-05 16:59:03,691 - INFO - Loading components from 'packages2\GIN_STP_PKG_NEW_GEMINIA_BODY'.
2024-12-05 16:59:03,699 - INFO - Loaded components from 'NEW_GEMINIA_BODY'.
2024-12-05 16:59:03,700 - INFO - Starting comparison of components using GPT-4.
2024-12-05 16:59:03,701 - INFO - Comparing components using GPT-4.
2024-12-05 16:59:09,322 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/openai/chat/completions "HTTP/1.1 429 Too Many Requests"
2024-12-05 16:59:09,326 - INFO - Retrying request to /chat/completions in 0.470569 seconds
2024-12-05 16:59:12,396 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/openai/chat/comple

RESPONSE FROM LLMs <comparison_process>

**Comparison of `GIN_STP_PKG_HERITAGE.populate_endos_details` and `GIN_STP_PKG_NEW_GEMINIA.populate_endos_details`**

We will compare the procedures section by section, noting differences and categorizing them.

**1. Procedure Header:**

* **GIN_STP_PKG_HERITAGE:**
```sql
PROCEDURE populate_endos_details (
   v_pol_no             IN       VARCHAR2,
   ...
   v_regional_endors    IN       VARCHAR2
)
```
* **GIN_STP_PKG_NEW_GEMINIA:**
```sql
PROCEDURE populate_endos_details (
        v_pol_no             IN     VARCHAR2,
        ...
        v_regional_endors    IN     VARCHAR2)
```
* **Difference:** The `IN` keyword spacing is different.  This is a minor stylistic difference.  **Category: Other Changes**.  **Impact:** Negligible.

**2. Variable Declarations:**

* **GIN_STP_PKG_HERITAGE:**
```sql
   v_ipu_covt_sht_desc     VARCHAR2 (10);
   ...
   v_pro_change_uw_on_ex   VARCHAR2 (2);
```
* **GIN_STP_PKG_NEW_GEMINIA:**
```sql
        v_ipu_covt_sht

2024-12-05 17:00:33,020 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/openai/chat/completions "HTTP/1.1 200 OK"
2024-12-05 17:00:33,058 - INFO - Comparison report for procedures 'GIN_RSK_LIMITS_STP' saved to 'gpt_reports\GIN_STP_PKG\procedures_GIN_RSK_LIMITS_STP_comparison_report.md'.


RESPONSE FROM LLMs <comparison_process>

**Comparison of `gin_rsk_limits_stp` Procedures (GIN_STP_PKG_HERITAGE vs. GIN_STP_PKG_NEW_GEMINIA)**

We will compare the procedures section by section, noting differences and categorizing them.

| Section             | GIN_STP_PKG_HERITAGE                                                                                                                                                                                                                                                                                          | GIN_STP_PKG_NEW_GEMINIA                                                                                                                                                                                                                                                                                                                      | Difference Category          | Potential Impact                                                     

2024-12-05 17:00:46,201 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/openai/chat/completions "HTTP/1.1 200 OK"
2024-12-05 17:00:46,266 - INFO - Comparison report for procedures 'UPDATE_COMM_DETAILS' saved to 'gpt_reports\GIN_STP_PKG\procedures_UPDATE_COMM_DETAILS_comparison_report.md'.


RESPONSE FROM LLMs <comparison_process>

**Procedure Comparison: `GIN_STP_PKG_HERITAGE.update_comm_details` vs. `GIN_STP_PKG_NEW_GEMINIA.update_comm_details`**

We will compare the procedures section by section:

| Section          | GIN_STP_PKG_HERITAGE.update_comm_details                                                                                                                                                           | GIN_STP_PKG_NEW_GEMINIA.update_comm_details                                                                                                                                                                                                 | Difference                                                                                                                                                              | Category             | Potential Impact                                                                                                                          

2024-12-05 17:01:50,786 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/openai/chat/completions "HTTP/1.1 200 OK"
2024-12-05 17:01:50,811 - INFO - Comparison report for procedures 'POP_SINGLE_TAXES' saved to 'gpt_reports\GIN_STP_PKG\procedures_POP_SINGLE_TAXES_comparison_report.md'.


RESPONSE FROM LLMs <comparison_process>

**Comparison of `GIN_STP_PKG_HERITAGE.pop_single_taxes` and `GIN_STP_PKG_NEW_GEMINIA.pop_single_taxes`**

We will compare the procedures section by section:

| Section             | GIN_STP_PKG_HERITAGE.pop_single_taxes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    

2024-12-05 17:02:07,850 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/openai/chat/completions "HTTP/1.1 200 OK"
2024-12-05 17:02:07,912 - INFO - Comparison report for procedures 'POPULATE_RENEWALS' saved to 'gpt_reports\GIN_STP_PKG\procedures_POPULATE_RENEWALS_comparison_report.md'.


RESPONSE FROM LLMs <comparison_process>

**Comparison of `GIN_STP_PKG_HERITAGE.populate_renewals` and `GIN_STP_PKG_NEW_GEMINIA.populate_renewals`**

We'll compare the procedures section by section, noting differences and categorizing them.

**1. Procedure Header:**

* **GIN_STP_PKG_HERITAGE:** `PROCEDURE populate_renewals (v_pol_data IN web_pol_tab, v_user VARCHAR2)`
* **GIN_STP_PKG_NEW_GEMINIA:** `PROCEDURE populate_renewals (v_trans_id IN NUMBER, v_user VARCHAR2)`

**Difference:**  The input parameter `v_pol_data` (type `web_pol_tab`) in Version 1 is replaced by `v_trans_id` (type `NUMBER`) in Version 2.  This is a **Change in Parameters** and a significant **Change in Logic**, as it alters the fundamental way the procedure receives input data.  Version 1 likely uses a collection of policy data, while Version 2 uses a transaction ID to retrieve the necessary data.  This requires careful consideration during merging, potentially involving changes to how the procedure retrieves and pro

2024-12-05 17:02:22,442 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/openai/chat/completions "HTTP/1.1 200 OK"
2024-12-05 17:02:22,469 - INFO - Comparison report for procedures 'TRANSFER_TO_UW' saved to 'gpt_reports\GIN_STP_PKG\procedures_TRANSFER_TO_UW_comparison_report.md'.


RESPONSE FROM LLMs <comparison_process>

**Comparison of `GIN_STP_PKG_HERITAGE.transfer_to_uw` and `GIN_STP_PKG_NEW_GEMINIA.transfer_to_uw`**

We will compare the procedures section by section, noting differences and categorizing them.

**1. Procedure Header:**

* **GIN_STP_PKG_HERITAGE:** `PROCEDURE transfer_to_uw (v_pol_batch_no   IN       NUMBER, v_user           IN       VARCHAR2, v_batch_no       OUT      NUMBER)`
* **GIN_STP_PKG_NEW_GEMINIA:** `PROCEDURE transfer_to_uw (v_pol_batch_no   IN     NUMBER, v_user           IN     VARCHAR2, v_batch_no          OUT NUMBER)`

**Difference:** Minor formatting difference in spacing.  **Category:** Other Changes. **Impact:** Negligible.

**2. Variable Declarations:**

* **GIN_STP_PKG_HERITAGE:** Declares variables `v_pol_no`, `v_ends_no`, ..., `v_agents_status`.
* **GIN_STP_PKG_NEW_GEMINIA:** Declares the same variables plus `v_blacklist_status            VARCHAR2 (400) := 'N';`

**Difference:** Addition of `v_blacklist_status`. **Category:

2024-12-05 17:02:38,704 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/openai/chat/completions "HTTP/1.1 200 OK"
2024-12-05 17:02:38,758 - INFO - Comparison report for procedures 'UPDATE_MANDATORY_SECTIONS' saved to 'gpt_reports\GIN_STP_PKG\procedures_UPDATE_MANDATORY_SECTIONS_comparison_report.md'.
2024-12-05 17:02:38,761 - INFO - Finished comparing components using GPT-4.
2024-12-05 17:02:38,765 - INFO - Comparison completed. Summary of differences:
2024-12-05 17:02:38,766 - INFO - 
Component Type: Procedures
2024-12-05 17:02:38,768 - INFO - Added: ['GIN_REN_RSK_LIMITS', 'GIN_IPU_PRC', 'GIN_POLICIES_STP_PRC', 'GIN_COINSURERS_PRC', 'GIN_REN_COINSURERS_PRC', 'GIN_RSK_STP_LIMITS', 'GIN_IPU_STP_PRC090216', 'GIN_RSK_LIMITS', 'GIN_IPU_STP_PRC']
2024-12-05 17:02:38,771 - INFO - Removed: []
2024-12-05 17:02:38,772 - INFO - Modified: ['POPULATE_ENDOS_DETAILS', 'GIN_RSK_LIMITS_STP', 'UPDATE_COMM_DETAILS', 'POP_SINGLE_TAXES', 'POPULATE_RENEWALS', 'TRANSFER_TO_UW', 

RESPONSE FROM LLMs <comparison_process>

**Comparison of `GIN_STP_PKG.update_mandatory_sections` Versions**

We will compare the two versions of the procedure `GIN_STP_PKG.update_mandatory_sections` side-by-side, section by section.

| Section             | Version 1 (GIN_STP_PKG_HERITAGE)                                                                                                                                                                                                                                                           | Version 2 (GIN_STP_PKG_NEW_GEMINIA)                                                                                                                                                                                                                                                        | Difference                                                                                                                                                                  