In [None]:
# Establish a connection with an LLM using Groq API key and the `dspy` library 

# pip install dspy-ai python-dotenv
import os
import dspy
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Configure DSPy to use Groq with Llama3.3
groq_api_key = os.getenv("GROQ_API_KEY")
if not groq_api_key:
    raise ValueError("GROQ_API_KEY not found in environment variables")

# Set up the Groq LLM with Llama3.3
llm = dspy.LM(
    model="groq/llama-3.3-70b-versatile",
    api_key=groq_api_key,  # Using Llama3.3 70B model
)

# Set the LLM as the default for DSPy
dspy.settings.configure(lm=llm)


# Define a simple DSPy Signature for question answering
class BasicQA(dspy.Signature):
    """Answer questions with short factoid answers."""
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 5 and 7 words")

# Pass the signature to the ChainOfThought module
generate_answer = dspy.ChainOfThought(BasicQA)

# Call the predictor on a particular input
question = "What is the color of the sky?"
pred = generate_answer(question=question)

print(f"Question: {question}")
print(f"Predicted Answer: {pred.answer}")

In [None]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

# Test dependencies
dependencies = [
    {"group_id": "com.google.guava", "artifact_id": "guava", "homepage": "https://github.com/google/guava"},
    {"group_id": "com.squareup.okhttp3", "artifact_id": "okhttp", "homepage": "https://github.com/square/okhttp"},
    {"group_id": "org.apache.commons", "artifact_id": "commons-lang3", "homepage": "https://commons.apache.org/proper/commons-lang/"},
    {"group_id": "org.hibernate", "artifact_id": "hibernate-core", "homepage": "https://hibernate.org/orm/"},
    {"group_id": "org.flywaydb", "artifact_id": "flyway-core", "homepage": "https://flywaydb.org/"},
    {"group_id": "org.bouncycastle", "artifact_id": "bcprov-jdk15on", "homepage": "https://www.bouncycastle.org/"}
]

# Common changelog filenames for GitHub projects
CHANGELOG_FILENAMES = ["CHANGELOG.md", "CHANGELOG.txt", "docs/changelog.md", "HISTORY.md", "RELEASE-NOTES.md"]

def fetch_dependency_insights(dep):
    dep_name = dep["artifact_id"]
    homepage_url = dep["homepage"]
    group_id = dep["group_id"]
    print(f"\n--- Fetching insights for {dep_name} ---")
    print(f"🌐 Scraping official site: {homepage_url}")

    insights = {}

    # GitHub-based projects
    if "github.com" in homepage_url:
        repo_path = homepage_url.replace("https://github.com/", "").strip("/")
        insights["SECURITY"] = f"https://github.com/{repo_path}/security"
        insights["RELEASE"] = f"https://github.com/{repo_path}/releases"
        insights["MIGRATION"] = f"https://github.com/{repo_path}/wiki/Migration-Guide"

        # Check for changelogs
        for file in CHANGELOG_FILENAMES:
            changelog_url = f"https://github.com/{repo_path}/blob/main/{file}"
            response = requests.head(changelog_url)
            if response.status_code == 200:
                insights["CHANGELOG"] = changelog_url
                break

    else:
        # General scraping for non-GitHub dependencies
        try:
            response = requests.get(homepage_url, timeout=5)
            if response.status_code == 200:
                soup = BeautifulSoup(response.text, "html.parser")
                links = {a.text.strip().lower(): urljoin(homepage_url, a["href"]) for a in soup.find_all("a", href=True)}

                # Match links based on keywords
                keyword_map = {
                    "SECURITY": ["security", "vulnerabilities", "cve"],
                    "RELEASE": ["release", "changelog", "changes"],
                    "MIGRATION": ["migration", "upgrade"]
                }

                for category, keywords in keyword_map.items():
                    for keyword in keywords:
                        for link_text, link_url in links.items():
                            if keyword in link_text:
                                insights[category] = link_url
                                break

        except requests.RequestException:
            print(f"[❌] Failed to fetch {homepage_url}")

    # ✅ **Fallback Mechanism**
    if not insights.get("RELEASE") or not insights.get("SECURITY"):
        fallback_maven_url = f"https://repo1.maven.org/maven2/{group_id.replace('.', '/')}/{dep_name}/"
        print(f"[🔄] Fallback: Checking Maven Repository → {fallback_maven_url}")
        insights["MAVEN_REPO"] = fallback_maven_url

    if dep_name.startswith("bcprov"):  # Special case for BouncyCastle
        insights["DOCS"] = "https://www.bouncycastle.org/documentation.html"

    # Print results
    for category, link in insights.items():
        print(f"[{category.upper()}] {link}")

# Run for all test dependencies
for dep in dependencies:
    fetch_dependency_insights(dep)

print("\n--- Done Testing ---")


In [36]:
# Fetch pom.xml from GitHub repository using Personal Access Token

import os
import requests
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

def fetch_github_file(owner: str, repo: str, path: str, branch: str = "main") -> str:
    """
    Fetch a file from GitHub repository using Personal Access Token.
    
    Args:
        owner (str): GitHub repository owner/organization
        repo (str): Repository name
        path (str): Path to file in the repository
        branch (str): Branch name (default: main)
    
    Returns:
        str: Content of the file
    """
    # Construct raw content URL
    url = f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{path}"

    # Get GitHub token from environment
    # PERSONAL_ACCESS_TOKEN = os.getenv("GIT_TOKEN")
    # PERSONAL_ACCESS_TOKEN = os.getenv("GITHUB_TOKEN")
    # if not PERSONAL_ACCESS_TOKEN:
    #     raise ValueError("GITHUB_TOKEN not found in .env file")
    
    # Set up headers with token
    headers = {
        "Authorization": f"Bearer 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",
        "Accept": "application/vnd.github.v3.raw"
    }
    
    # Make request
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        return response.text
    else:
        raise Exception(f"Failed to fetch file. Status: {response.status_code}\nMessage: {response.text}")

# Example usage
try:
    # Replace with actual repository details
    pom_content = fetch_github_file(
        owner="techneo1",
        repo="AI-Engineering",
        path="pom.xml",
        branch="master"
    )

    # pom_content = fetch_github_file(
    #     owner="vinitdadyala",
    #     repo="adu-ai-agent",
    #     path="pom.xml",
    #     branch="master"
    # )
    
    print("Successfully fetched pom.xml:")
    print(f"{pom_content}...")
    
    # Optionally save to file
    # with open("downloaded_pom.xml", "w") as f:
    #     f.write(pom_content)
    
except Exception as e:
    print(f"Error: {str(e)}")

Successfully fetched pom.xml:
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.example</groupId>
    <artifactId>dependency-analyzer</artifactId>
    <version>1.0.0</version>

    <dependencies>
        <dependency>
            <groupId>com.google.guava</groupId>
            <artifactId>guava</artifactId>
            <version>32.0.0-jre</version>
        </dependency>

        <dependency>
            <groupId>com.squareup.okhttp3</groupId>
            <artifactId>okhttp</artifactId>
            <version>4.11.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
            <version>3.13.0</version>
        </dependency>

        <dependency>
            <gr

In [4]:
import os

def find_java_files(base_dir):
    java_files = []
    for root, _, files in os.walk(base_dir):
        for file in files:
            if file.endswith(".java"):
                java_files.append(os.path.join(root, file))
    return java_files

if __name__ == "__main__":
    base_dir = r"C:\tmp\test\HelloWorldSample"
    java_files = find_java_files(base_dir)
    for jf in java_files:
        print(jf)

C:\tmp\test\HelloWorldSample\src\main\java\com\example\StringUtilsExample.java
