In [1]:
# Establish a connection with an LLM using Groq API key and the `dspy` library 

# pip install dspy-ai python-dotenv
import os
import dspy
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Configure DSPy to use Groq with Llama3.3
groq_api_key = os.getenv("GROQ_API_KEY")
if not groq_api_key:
    raise ValueError("GROQ_API_KEY not found in environment variables")

# Set up the Groq LLM with Llama3.3
llm = dspy.LM(
    model="groq/llama-3.3-70b-versatile",
    api_key=groq_api_key,  # Using Llama3.3 70B model
)

# Set the LLM as the default for DSPy
dspy.settings.configure(lm=llm)


# Define a simple DSPy Signature for question answering
class BasicQA(dspy.Signature):
    """Answer questions with short factoid answers."""
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 5 and 7 words")

# Pass the signature to the ChainOfThought module
generate_answer = dspy.ChainOfThought(BasicQA)

# Call the predictor on a particular input
question = "What is the color of the sky?"
pred = generate_answer(question=question)

print(f"Question: {question}")
print(f"Predicted Answer: {pred.answer}")

  from .autonotebook import tqdm as notebook_tqdm


Question: What is the color of the sky?
Predicted Answer: Blue on a sunny day


In [9]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

# Test dependencies
dependencies = [
    {"group_id": "com.google.guava", "artifact_id": "guava"},
    {"group_id": "com.squareup.okhttp3", "artifact_id": "okhttp"},
    {"group_id": "org.apache.commons", "artifact_id": "commons-lang3"},
    {"group_id": "org.hibernate", "artifact_id": "hibernate-core"},
    {"group_id": "org.flywaydb", "artifact_id": "flyway-core"},
    {"group_id": "org.bouncycastle", "artifact_id": "bcprov-jdk15on"}
]

def get_homepage_from_maven(group_id, artifact_id):
    """Fetch correct homepage URL from Maven Search API or fallback to MvnRepository."""
    search_url = f"https://search.maven.org/solrsearch/select?q=g:%22{group_id}%22+AND+a:%22{artifact_id}%22&rows=1&wt=json"
    
    try:
        response = requests.get(search_url, timeout=5)
        data = response.json()
        
        if "response" in data and data["response"]["docs"]:
            doc = data["response"]["docs"][0]
            homepage_url = doc.get("homepage", None)
            if homepage_url:
                return homepage_url  # ✅ Found real homepage from API
        
    except requests.RequestException as e:
        print(f"[❌] Error fetching from Maven API: {e}")

    # Fallback: Scrape MvnRepository for homepage
    mvn_repo_url = f"https://mvnrepository.com/artifact/{group_id}/{artifact_id}"
    try:
        response = requests.get(mvn_repo_url, timeout=5)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, "html.parser")
            homepage_link = soup.find("a", text="Homepage")
            if homepage_link:
                return urljoin(mvn_repo_url, homepage_link["href"])  # ✅ Scraped real homepage

    except requests.RequestException:
        pass  # Ignore scraping failure

    return None  # ❌ No homepage found

def fetch_dependency_insights(dep):
    """Fetch insights for a given dependency."""
    dep_name = dep["artifact_id"]
    group_id = dep["group_id"]
    
    homepage_url = get_homepage_from_maven(group_id, dep_name)
    if not homepage_url:
        print(f"[⚠️] No homepage found for {dep_name}. Skipping...")
        return
    
    print(f"\n--- Fetching insights for {dep_name} ---")
    print(f"🌐 Scraping official site: {homepage_url}")

    insights = {}

    # Scrape homepage content
    try:
        response = requests.get(homepage_url, timeout=5)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, "html.parser")
            text_content = "\n".join([p.get_text(strip=True) for p in soup.find_all("p")][:5])
            insights["SCRAPED_CONTENT"] = text_content if text_content else "[No readable content found]"
        else:
            insights["ERROR"] = f"Failed to fetch page, status code: {response.status_code}"
    
    except requests.RequestException as e:
        insights["ERROR"] = f"Request failed: {e}"

    # Print extracted content
    for category, content in insights.items():
        print(f"\n[{category.upper()}]\n{content}\n")

# Run for all test dependencies
for dep in dependencies:
    fetch_dependency_insights(dep)

print("\n--- Done Testing ---")


[⚠️] No homepage found for guava. Skipping...
[⚠️] No homepage found for okhttp. Skipping...
[⚠️] No homepage found for commons-lang3. Skipping...
[⚠️] No homepage found for hibernate-core. Skipping...
[⚠️] No homepage found for flyway-core. Skipping...
[⚠️] No homepage found for bcprov-jdk15on. Skipping...

--- Done Testing ---


In [4]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

# Test dependency (only the first one)
dependency = {
    "group_id": "com.google.guava",
    "artifact_id": "guava",
    "homepage": "https://github.com/google/guava"
}

def fetch_dependency_insights(dep):
    dep_name = dep["artifact_id"]
    homepage_url = dep["homepage"]
    print(f"\n--- Fetching insights for {dep_name} ---")
    print(f"🌐 Scraping official site: {homepage_url}")

    insights = {}

    # Fetch and scrape webpage content
    try:
        response = requests.get(homepage_url, timeout=5)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, "html.parser")

            # Extract main content (limit text length for readability)
            text_content = "\n".join([p.get_text(strip=True) for p in soup.find_all("p")][:5])

            insights["SCRAPED_CONTENT"] = text_content if text_content else "[No readable content found]"
        else:
            insights["ERROR"] = f"Failed to fetch page, status code: {response.status_code}"
    
    except requests.RequestException as e:
        insights["ERROR"] = f"Request failed: {e}"

    # Print extracted content
    for category, content in insights.items():
        print(f"\n[{category.upper()}]\n{content}\n")

# Run for the first test dependency
fetch_dependency_insights(dependency)

print("\n--- Done Testing ---")



--- Fetching insights for guava ---
🌐 Scraping official site: https://github.com/google/guava

[SCRAPED_CONTENT]
We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see ourdocumentation.
Google core libraries for Java

Guava is a set of core Java libraries from Google that includes new collection
types (such as multimap and multiset), immutable collections, a graph library,
and utilities for concurrency, I/O, hashing, primitives, strings, and more! It
is widely used on most Java projects within Google, and widely used by many
other companies as well.


--- Done Testing ---
