In [7]:
!pip install --quiet openai huggingface_hub pandas matplotlib



[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [8]:
import os
import pandas as pd
import matplotlib.pyplot as plt


from openai import OpenAI
from openai import AuthenticationError, RateLimitError, APIConnectionError, BadRequestError


from huggingface_hub import InferenceClient

# -----------------------------
#  Choose your provider here:
# -----------------------------
# "openai"  
# "hf"      
PROVIDER = "openai"   # default to OpenAI;

# -----------------------------
# Keys (leave placeholders)
# -----------------------------
os.environ.setdefault("OPENAI_API_KEY", "YOUR_OPENAI_API_KEY")
os.environ.setdefault("HF_TOKEN", "YOUR_HF_TOKEN")

# -----------------------------
# Model selection
# -----------------------------

OPENAI_MODEL = "gpt-4o-mini"  

# Hugging Face Models: gpt2, 
HF_MODEL = "gpt2"   # safe public default; can be swapped later




In [9]:
openai_client = None
hf_client = None

if PROVIDER == "openai":
    try:
        openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
        
        print("OpenAI client ready.")
    except Exception as e:
        print("OpenAI init failed. Falling back to Hugging Face.\n", repr(e))
        PROVIDER = "hf"

if PROVIDER == "hf":
    try:
        tok = os.getenv("HF_TOKEN") 
        hf_client = InferenceClient(HF_MODEL, token=tok) if tok else InferenceClient(HF_MODEL)
        print(f"Hugging Face client ready ({HF_MODEL}).")
    except Exception as e:
        print("HF init failed. Please check HF model name/token.\n", repr(e))


OpenAI client ready.


In [None]:
def generate_text(
    prompt: str,
    provider: str = PROVIDER,
    temperature: float = 0.7,
    top_p: float = 1.0,
    max_tokens: int = 120,
    frequency_penalty: float = 0.0,
    presence_penalty: float = 0.0,
):
    """
    Returns model text for a given prompt using either OpenAI or Hugging Face.
    Handles common parameters and gracefully reports quota/auth issues.
    """
    if provider == "openai":
        if openai_client is None:
            return "[OpenAI not initialized. Provide a valid OPENAI_API_KEY or switch PROVIDER='hf']"
        try:
            resp = openai_client.chat.completions.create(
                model=OPENAI_MODEL,
                messages=[
                    {"role": "system", "content": "You are a helpful assistant. Keep answers concise."},
                    {"role": "user", "content": prompt}
                ],
                temperature=temperature,
                top_p=top_p,
                max_tokens=max_tokens,
                frequency_penalty=frequency_penalty,
                presence_penalty=presence_penalty,
            )
            return resp.choices[0].message.content.strip()
        except AuthenticationError:
            return "[OpenAI auth error: set OPENAI_API_KEY or ask your lead to add theirs]"
        except RateLimitError:
            return "[OpenAI rate/quota error: insufficient credits or rate limit hit]"
        except (APIConnectionError, BadRequestError) as e:
            return f"[OpenAI API error: {e}]"
        except Exception as e:
            return f"[OpenAI unexpected error: {repr(e)}]"

    # Hugging Face path (free)
    if hf_client is None:
        return "[HF not initialized. Set PROVIDER='hf' or install huggingface_hub]"
    try:
        # HF uses max_new_tokens; map from max_tokens
        out = hf_client.text_generation(
            prompt,
            max_new_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            # return_full_text=False  # uncomment for some models if you only want the new text
        )
        # text_generation may return string or dict depending on backend; normalize
        return out if isinstance(out, str) else str(out)
    except Exception as e:
        return f"[HF error: {repr(e)}]"


In [None]:
PROMPT = "Write a short (4-5 sentences) explainer on why 'temperature' changes an LLM's writing style."

baseline = generate_text(PROMPT, temperature=0.7, top_p=1.0, max_tokens=120)
print("=== Baseline ===\n", baseline)


In [None]:
temps = [0.0, 0.3, 0.7, 1.0]
rows = []
for t in temps:
    text = generate_text(PROMPT, temperature=t, top_p=1.0, max_tokens=120)
    rows.append({"Experiment": f"temperature={t}", "Output": text, "Length(chars)": len(text)})

df_temp = pd.DataFrame(rows)
df_temp


In [11]:
tops = [0.3, 0.7, 1.0]
rows = []
for p in tops:
    text = generate_text(PROMPT, temperature=0.7, top_p=p, max_tokens=120)
    rows.append({"Experiment": f"top_p={p}", "Output": text, "Length(chars)": len(text)})

df_topp = pd.DataFrame(rows)
df_topp


NameError: name 'PROMPT' is not defined

In [12]:
lengths = [50, 100, 150]
rows = []
for m in lengths:
    text = generate_text(PROMPT, temperature=0.7, top_p=0.95, max_tokens=m)
    rows.append({"Experiment": f"max_tokens={m}", "Output": text, "Length(chars)": len(text)})

df_maxtok = pd.DataFrame(rows)
df_maxtok


NameError: name 'PROMPT' is not defined

In [13]:
rows = []
if PROVIDER == "openai":
    for fp in [0.0, 0.5, 1.0]:
        text = generate_text(PROMPT, temperature=0.7, top_p=0.95, max_tokens=120, frequency_penalty=fp)
        rows.append({"Experiment": f"frequency_penalty={fp}", "Output": text, "Length(chars)": len(text)})

    for pp in [0.0, 0.5, 1.0]:
        text = generate_text(PROMPT, temperature=0.7, top_p=0.95, max_tokens=120, presence_penalty=pp)
        rows.append({"Experiment": f"presence_penalty={pp}", "Output": text, "Length(chars)": len(text)})

    df_penalties = pd.DataFrame(rows)
else:
    df_penalties = pd.DataFrame([{"Experiment": "penalties", "Output": "[Penalties not available on HF path]", "Length(chars)": 0}])

df_penalties


NameError: name 'PROMPT' is not defined

In [14]:
# Combine available experiment tables
frames = [("Temperature", df_temp), ("Top-p", df_topp), ("Max Tokens", df_maxtok)]
if PROVIDER == "openai":
    frames.append(("Penalties", df_penalties))

# Plot one chart per group showing output length (as a rough proxy for verbosity)
for title, frame in frames:
    plt.figure(figsize=(8, 4))
    plt.bar(range(len(frame)), frame["Length(chars)"])
    plt.xticks(range(len(frame)), frame["Experiment"], rotation=30, ha="right")
    plt.ylabel("Output length (chars)")
    plt.title(f"{title} Sweep: Output Length")
    plt.tight_layout()
    plt.show()


NameError: name 'df_temp' is not defined