**AI-Powered Enterprise Data Insights Agent for Automated Business Intelligence**

Enterprises generate large volumes of structured data (customer data, sales, campaigns, operations).
However:

* Manual analysis is slow

* Insights depend heavily on analyst expertise

* Reports are inconsistent

* Non-technical teams (sales, marketing, operations) cannot extract insights without a data team

* Traditional dashboards only show surface-level metrics

Result: organizations fail to make timely, data-driven decisions.

In [1]:
# 1. Imports and set-up

import os
import json
import time
import logging
from datetime import datetime
from pprint import pprint

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

# Optional libs
try:
    from imblearn.over_sampling import SMOTE
    HAVE_SMOTE = True
except Exception:
    HAVE_SMOTE = False

try:
    from xgboost import XGBClassifier
    HAVE_XGB = True
except Exception:
    HAVE_XGB = False

print("HAVE_SMOTE:", HAVE_SMOTE, "HAVE_XGB:", HAVE_XGB)

# Observability: logging
LOGFILE = "agent_trace.log"
logging.basicConfig(filename=LOGFILE, level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
def log(msg, level="info"):
    getattr(logging, level)(msg)
    print(msg)

# JSON-safety helper
def convert_json_safe(obj):
    """Recursively convert numpy types and non-json-serializable keys to json-safe types."""
    if isinstance(obj, dict):
        out = {}
        for k, v in obj.items():
            # convert keys to str (JSON requires string keys)
            if isinstance(k, (np.integer, np.int64, np.int32)):
                k2 = int(k)
            else:
                k2 = str(k)
            out[k2] = convert_json_safe(v)
        return out
    elif isinstance(obj, list):
        return [convert_json_safe(i) for i in obj]
    elif isinstance(obj, (np.integer, np.int64, np.int32)):
        return int(obj)
    elif isinstance(obj, (np.floating, np.float64)):
        return float(obj)
    elif isinstance(obj, (np.ndarray,)):
        return obj.tolist()
    else:
        return obj

HAVE_SMOTE: False HAVE_XGB: True


In [2]:
# 2. Dataset loader (handles semicolon separated bank-full.csv)
def load_bank_dataset(path="/mnt/data/bank-full.csv"):
    if not os.path.exists(path):
        # try common Kaggle input paths
        candidates = [
            "/kaggle/input/bank-marketing-dataset/bank-full.csv",
            "/kaggle/input/bank-marketing-dataset/bank.csv",
            "../input/bank-marketing-dataset/bank-full.csv",
            "../input/bank-marketing-dataset/bank.csv"
        ]
        for c in candidates:
            if os.path.exists(c):
                path = c
                break
    if not os.path.exists(path):
        raise FileNotFoundError(f"Bank dataset not found at {path}. Upload `bank-full.csv` or add the Kaggle dataset.")
    df = pd.read_csv(path, sep=';')
    log(f"Loaded dataset from {path} shape={df.shape}")
    return df

df = load_bank_dataset("/mnt/data/bank-full.csv")
display(df.head())
print("Columns:", df.columns.tolist())

Loaded dataset from /kaggle/input/bank-marketing-dataset/bank-full.csv shape=(45211, 17)


Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no


Columns: ['age', 'job', 'marital', 'education', 'default', 'balance', 'housing', 'loan', 'contact', 'day', 'month', 'duration', 'campaign', 'pdays', 'previous', 'poutcome', 'y']


In [3]:
# 3. Basic cleaning, target mapping, engineering
# Map target to numeric (if not already)
if 'y' in df.columns:
    if not np.issubdtype(df['y'].dtype, np.number):
        df['y'] = df['y'].map({'yes': 1, 'no': 0})
        log("Mapped 'y' to numeric 1/0")

# Safety: drop 'duration' if it exists (some datasets include it; some do not)
if 'duration' in df.columns:
    df = df.drop(columns=['duration'])
    log("Dropped 'duration' column (leakage mitigation)")

# Create pdays_never flag for 'pdays' if present
if 'pdays' in df.columns:
    df['pdays_never'] = (df['pdays'] == -1).astype(int)
    log("Created 'pdays_never' flag (1 if pdays == -1)")

# Quick checks
print("Shape after cleaning:", df.shape)
print("Missing values per column:\n", df.isnull().sum())
print("Target distribution:\n", df['y'].value_counts().to_dict())

Mapped 'y' to numeric 1/0
Dropped 'duration' column (leakage mitigation)
Created 'pdays_never' flag (1 if pdays == -1)
Shape after cleaning: (45211, 17)
Missing values per column:
 age            0
job            0
marital        0
education      0
default        0
balance        0
housing        0
loan           0
contact        0
day            0
month          0
campaign       0
pdays          0
previous       0
poutcome       0
y              0
pdays_never    0
dtype: int64
Target distribution:
 {0: 39922, 1: 5289}


In [4]:
# 4. Discover numeric/categorical columns dynamically (avoid hardcoding)
# excluded the target from feature lists
target_col = 'y' if 'y' in df.columns else None

numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
if target_col and target_col in numeric_cols:
    numeric_cols.remove(target_col)
# keep 'pdays_never' as numeric if present but it might already be included
# it's okay if numeric_cols contains pdays or pdays_never â€” preprocessor will handle it

categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
if target_col and target_col in categorical_cols:
    categorical_cols.remove(target_col)

log(f"Numeric cols detected: {numeric_cols}")
log(f"Categorical cols detected: {categorical_cols}")

Numeric cols detected: ['age', 'balance', 'day', 'campaign', 'pdays', 'previous', 'pdays_never']
Categorical cols detected: ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome']


In [5]:
# 5. Build robust preprocessing pipeline (handles empty cat/num gracefully)
def build_preprocessor(num_cols, cat_cols):
    transformers = []
    if len(num_cols) > 0:
        num_pipe = Pipeline([('imputer', SimpleImputer(strategy='median')), ('scale', StandardScaler())])
        transformers.append(('num', num_pipe, num_cols))
    if len(cat_cols) > 0:
        cat_pipe = Pipeline([('imputer', SimpleImputer(strategy='constant', fill_value='unknown')), ('ohe', OneHotEncoder(handle_unknown='ignore', sparse=False))])
        transformers.append(('cat', cat_pipe, cat_cols))
    if len(transformers) == 0:
        raise RuntimeError("No numeric or categorical columns found for preprocessing.")
    pre = ColumnTransformer(transformers=transformers, remainder='drop')
    return pre

preprocessor = build_preprocessor(numeric_cols, categorical_cols)
log("Preprocessor built successfully")

Preprocessor built successfully


In [6]:
# 6. EDA 
def automated_eda(df, target_col='y', out_dir='figs'):
    os.makedirs(out_dir, exist_ok=True)
    numeric = numeric_cols
    cat = categorical_cols
    # small histograms for first few numerics
    for c in numeric[:5]:
        plt.figure(figsize=(4,3))
        df[c].hist(bins=30)
        plt.title(f"hist: {c}")
        plt.tight_layout()
        plt.savefig(os.path.join(out_dir, f"hist_{c}.png"))
        plt.close()
    # target distribution
    if target_col in df.columns:
        plt.figure(figsize=(4,3))
        df[target_col].value_counts().plot.bar()
        plt.title("target distribution")
        plt.tight_layout()
        plt.savefig(os.path.join(out_dir, "target_distribution.png"))
        plt.close()
    # sample head saved as json text for trace
    head = df.head(3).to_dict(orient='records')
    log("EDA artifacts saved to 'figs/'")
    return {'numeric': numeric, 'categorical': cat, 'head': head}

eda_summary = automated_eda(df, target_col=target_col)

EDA artifacts saved to 'figs/'


In [7]:
# 7. Train/test split + imbalance detection
X = df.drop(columns=[target_col]) if target_col else df.copy()
y = df[target_col].values if target_col else None

# ensure dataset has enough rows
if X.shape[0] == 0:
    raise RuntimeError("Empty dataset after preprocessing. Aborting.")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
log(f"Split performed: X_train {X_train.shape}, X_test {X_test.shape}")

from collections import Counter
counts = dict(Counter(y))
ratio = (counts.get(0,0) / counts.get(1,1)) if counts.get(1,0) > 0 else np.inf
imbalance = {'counts': counts, 'ratio': float(ratio), 'is_imbalanced': ratio > 4.0}
log(f"Class distribution: {imbalance}")

Split performed: X_train (36168, 16), X_test (9043, 16)
Class distribution: {'counts': {0: 39922, 1: 5289}, 'ratio': 7.548118737001324, 'is_imbalanced': True}


In [8]:
# 8. Training helpers (safe) and model candidates
def train_with_preprocessor(model, X_train, y_train, preprocessor, X_test, y_test, use_smote=False):
    # Fit preprocess on training set
    X_train_p = preprocessor.fit_transform(X_train)
    X_test_p = preprocessor.transform(X_test)
    # Optional SMOTE (applied after preprocessing)
    if use_smote and HAVE_SMOTE:
        sm = SMOTE(random_state=42)
        X_train_p, y_train = sm.fit_resample(X_train_p, y_train)
    model.fit(X_train_p, y_train)
    y_pred = model.predict(X_test_p)
    try:
        y_prob = model.predict_proba(X_test_p)[:,1]
    except Exception:
        y_prob = None
    metrics = {
        'accuracy': float(accuracy_score(y_test, y_pred)),
        'precision': float(precision_score(y_test, y_pred, zero_division=0)),
        'recall': float(recall_score(y_test, y_pred, zero_division=0)),
        'f1': float(f1_score(y_test, y_pred, zero_division=0))
    }
    if y_prob is not None:
        try:
            metrics['roc_auc'] = float(roc_auc_score(y_test, y_prob))
        except Exception:
            metrics['roc_auc'] = None
    cm = confusion_matrix(y_test, y_pred).tolist()
    return {'model': model, 'metrics': metrics, 'confusion_matrix': cm, 'preprocessor': preprocessor}

# Quick models for demo
models = [
    ('LogisticRegression_balanced', LogisticRegression(max_iter=1000, class_weight='balanced')),
    ('RandomForest_quick', RandomForestClassifier(n_estimators=80, max_depth=10, n_jobs=1, class_weight='balanced', random_state=42))
]
if HAVE_XGB:
    models.append(('XGBoost_quick', XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=100, tree_method='hist')))

# Use SMOTE only if available and imbalance detected
use_smote = HAVE_SMOTE and imbalance['is_imbalanced']
log(f"SMOTE enabled: {use_smote}")

results = []
for name, mdl in models:
    log(f"Starting training: {name}")
    start = time.time()
    try:
        # For tree models, sample a subset of training set for speed on demo
        samp_frac = 1.0
        if 'RandomForest' in name or 'XGBoost' in name:
            samp_frac = 0.5
        if samp_frac < 1.0:
            X_train_sub = X_train.sample(frac=samp_frac, random_state=42)
            y_train_sub = y_train[X_train_sub.index]
        else:
            X_train_sub, y_train_sub = X_train, y_train
        res = train_with_preprocessor(mdl, X_train_sub, y_train_sub, preprocessor, X_test, y_test, use_smote=use_smote)
        res['name'] = name
        res['train_time_sec'] = round(time.time() - start, 2)
        results.append(res)
        log(f"Finished {name} in {res['train_time_sec']}s. Metrics: {res['metrics']}")
    except Exception as e:
        log(f"Training failed for {name}: {e}", level="error")

if len(results) == 0:
    raise RuntimeError("No models produced results. Check training logs.")

SMOTE enabled: False
Starting training: LogisticRegression_balanced




Finished LogisticRegression_balanced in 1.43s. Metrics: {'accuracy': 0.7550591617825942, 'precision': 0.26626262626262626, 'recall': 0.6228733459357277, 'f1': 0.3730540617039343, 'roc_auc': 0.7722321981314207}
Starting training: RandomForest_quick
Training failed for RandomForest_quick: index 44782 is out of bounds for axis 0 with size 36168
Starting training: XGBoost_quick
Training failed for XGBoost_quick: index 44782 is out of bounds for axis 0 with size 36168


In [9]:
# 9. Choose best model (by F1) and compute light permutation importances (safe)
def choose_best_by(results, key='f1'):
    best = None
    best_score = -1
    for r in results:
        s = r['metrics'].get(key, 0)
        if s > best_score:
            best_score = s
            best = r
    return best

top = choose_best_by(results, 'f1')
log(f"Selected top model: {top['name']} with metrics: {top['metrics']}")

# Try permutation importance lightly (n_repeats small)
fi = None
try:
    from sklearn.inspection import permutation_importance
    log("Computing light permutation importance (n_repeats=3)")
    # Ensure we have a fitted preprocessor and model in top
    X_test_p = top['preprocessor'].transform(X_test)
    perm = permutation_importance(top['model'], X_test_p, y_test, n_repeats=3, random_state=42, n_jobs=1)
    imps = perm.importances_mean
    # reconstruct feature names best-effort
    feat_names = []
    try:
        # numeric names first
        feat_names.extend(numeric_cols)
        # get cat feature names from ohe if present
        for name, trans, cols in top['preprocessor'].transformers_:
            if name == 'cat':
                try:
                    ohe = trans.named_steps['ohe']
                    cat_names = list(ohe.get_feature_names_out(cols))
                    feat_names.extend(cat_names)
                except Exception:
                    pass
        if len(feat_names) == len(imps):
            fi = dict(sorted(zip(feat_names, imps), key=lambda x: -x[1])[:20])
        else:
            # fallback: index-based names
            fi = {f"f_{i}": float(imps[i]) for i in range(min(20, len(imps)))}
    except Exception:
        fi = {f"f_{i}": float(imps[i]) for i in range(min(20, len(imps)))}
    log("Permutation importances computed.")
except Exception as e:
    log("Permutation importance failed or timed out: " + str(e), level="warning")
    fi = None

Selected top model: LogisticRegression_balanced with metrics: {'accuracy': 0.7550591617825942, 'precision': 0.26626262626262626, 'recall': 0.6228733459357277, 'f1': 0.3730540617039343, 'roc_auc': 0.7722321981314207}
Computing light permutation importance (n_repeats=3)
Permutation importances computed.


In [10]:
# 10. LLM stub for executive summary (replace with Gemini/OpenAI if you have API access)
def llm_summarize_stub(best_model_info, fi):
    # Template-based simulated summary (safe for public notebook)
    lines = []
    lines.append("EXECUTIVE SUMMARY (Simulated LLM Output)")
    lines.append(f"Top model: {best_model_info.get('name')}")
    metrics = best_model_info.get('metrics', {})
    lines.append("Metrics:")
    lines.append(f"- Accuracy: {metrics.get('accuracy'):.3f}")
    lines.append(f"- Precision: {metrics.get('precision'):.3f}")
    lines.append(f"- Recall: {metrics.get('recall'):.3f}")
    lines.append(f"- F1-score: {metrics.get('f1'):.3f}")
    if metrics.get('roc_auc') is not None:
        lines.append(f"- ROC-AUC: {metrics.get('roc_auc'):.3f}")
    lines.append("")
    if fi:
        lines.append("Top feature signals (approx):")
        for k, v in list(fi.items())[:8]:
            lines.append(f"- {k}: {v:.4f}")
    lines.append("")
    lines.append("Actionable recommendations:")
    lines.append("- Focus outreach on segments with positive signals; run A/B tests for model-based targeting.")
    lines.append("- Monitor model drift and fairness metrics before production rollout.")
    return "\n".join(lines)

executive_summary = llm_summarize_stub(top, fi)
print("Executive summary preview:\n", executive_summary[:800])

Executive summary preview:
 EXECUTIVE SUMMARY (Simulated LLM Output)
Top model: LogisticRegression_balanced
Metrics:
- Accuracy: 0.755
- Precision: 0.266
- Recall: 0.623
- F1-score: 0.373
- ROC-AUC: 0.772

Top feature signals (approx):
- month_aug: 0.0436
- pdays_never: 0.0371
- poutcome_failure: 0.0345
- month_jul: 0.0324
- month_nov: 0.0323
- poutcome_success: 0.0171
- month_jan: 0.0126
- month_feb: 0.0059

Actionable recommendations:
- Focus outreach on segments with positive signals; run A/B tests for model-based targeting.
- Monitor model drift and fairness metrics before production rollout.


In [11]:
# 11. Memory & artifacts saving (JSON-safe)
MEMORY_FILE = "agent_memory_capstone.json"
def load_memory():
    if os.path.exists(MEMORY_FILE):
        try: return json.load(open(MEMORY_FILE,'r'))
        except: return {}
    return {}

def save_memory(mem):
    with open(MEMORY_FILE,'w') as f:
        json.dump(convert_json_safe(mem), f, indent=2)
    log("Memory saved to " + MEMORY_FILE)

memory = load_memory()
memory['last_run'] = {
    'timestamp': datetime.utcnow().isoformat(),
    'top_model': {'name': top['name'], 'metrics': top['metrics']},
    'imbalance': imbalance
}
save_memory(memory)

artifacts = {
    'timestamp': datetime.utcnow().isoformat(),
    'data_shape': df.shape,
    'top_model': {'name': top['name'], 'metrics': top['metrics']},
    'models': [{ 'name': r['name'], 'metrics': r['metrics'] } for r in results],
    'feature_importances': fi,
    'imbalance': imbalance
}

# JSON-safe convert then save
artifacts_safe = convert_json_safe(artifacts)
with open('agent_artifacts.json','w') as f:
    json.dump(artifacts_safe, f, indent=2)
with open('executive_summary.txt','w') as f:
    f.write(executive_summary)

log("Saved artifacts: agent_artifacts.json, executive_summary.txt")

Memory saved to agent_memory_capstone.json
Saved artifacts: agent_artifacts.json, executive_summary.txt


In [12]:
# 12. Simple improvement loop (Loop Agent demo) - limited iterations
def improvement_loop(df, memory, max_iters=1, threshold_f1=0.45):
    log("Starting improvement loop (demo, limited iters)")
    best_overall = memory.get('best_overall', {'f1': -1})
    for i in range(max_iters):
        log(f"Improvement iteration {i+1}")
        # For demo we will retrain quickly by calling the training block with slightly different RF params
        mdl = RandomForestClassifier(n_estimators=60 + 40*i, max_depth=12, n_jobs=1, class_weight='balanced', random_state=42)
        try:
            # reuse preprocessor fitted on full X_train
            res = train_with_preprocessor(mdl, X_train, y_train, preprocessor, X_test, y_test, use_smote=use_smote)
            f1 = res['metrics'].get('f1', 0)
            log(f"Iter {i+1} f1={f1}")
            if f1 > best_overall.get('f1', -1):
                best_overall = {'f1': f1, 'model': 'RandomForest_iter_' + str(i+1), 'metrics': res['metrics']}
                memory['best_overall'] = best_overall
                save_memory(memory)
            if f1 >= threshold_f1:
                log("Threshold reached; stopping improvement loop.")
                break
        except Exception as e:
            log("Improvement iteration failed: " + str(e), level="warning")
    return best_overall

best_overall = improvement_loop(df, memory, max_iters=1, threshold_f1=0.45)
log(f"Improvement loop finished. best_overall: {best_overall}")

Starting improvement loop (demo, limited iters)
Improvement iteration 1




Iter 1 f1=0.45578231292517013
Memory saved to agent_memory_capstone.json
Threshold reached; stopping improvement loop.
Improvement loop finished. best_overall: {'f1': 0.45578231292517013, 'model': 'RandomForest_iter_1', 'metrics': {'accuracy': 0.8407608094658852, 'precision': 0.3797229219143577, 'recall': 0.5699432892249527, 'f1': 0.45578231292517013, 'roc_auc': 0.7926273033203798}}


In [13]:
# Cell 13: Observability sample: show tail of log file
print("\n--- last log lines ---")
if os.path.exists(LOGFILE):
    with open(LOGFILE,'r') as f:
        tail = f.readlines()[-30:]
    print("".join(tail))
else:
    print("No log file found.")


--- last log lines ---
2025-11-29 12:22:27,006 INFO Loaded dataset from /kaggle/input/bank-marketing-dataset/bank-full.csv shape=(45211, 17)
2025-11-29 12:22:43,233 INFO Mapped 'y' to numeric 1/0
2025-11-29 12:22:43,242 INFO Dropped 'duration' column (leakage mitigation)
2025-11-29 12:22:43,244 INFO Created 'pdays_never' flag (1 if pdays == -1)
2025-11-29 12:22:50,081 INFO Numeric cols detected: ['age', 'balance', 'day', 'campaign', 'pdays', 'previous', 'pdays_never']
2025-11-29 12:22:50,081 INFO Categorical cols detected: ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome']
2025-11-29 12:22:56,057 INFO Preprocessor built successfully
2025-11-29 12:23:05,660 INFO EDA artifacts saved to 'figs/'
2025-11-29 12:23:42,468 INFO Split performed: X_train (36168, 16), X_test (9043, 16)
2025-11-29 12:23:42,477 INFO Class distribution: {'counts': {0: 39922, 1: 5289}, 'ratio': 7.548118737001324, 'is_imbalanced': True}
2025-11-29 12:23:52,020 INFO SMOTE ena

In [14]:
# 14. Prepare README and submission snippet (auto-create)
README = f"""
# Enterprise Data-Insights Agent for Bank Marketing Campaigns

Author: YOUR_NAME
Track: Enterprise Agents

Brief: Agentic pipeline that ingests the Bank Marketing dataset, runs EDA, trains models, and generates an executive summary. Demonstrates agentic features: tools, session & memory, loop agent, and observability.

Artifacts produced:
- agent_artifacts.json
- executive_summary.txt
- agent_memory_capstone.json
- agent_trace.log

Run: paste this notebook into Kaggle and run cells top->down.
"""
with open("README_capstone.md","w") as f:
    f.write(README.strip())
log("README_capstone.md written")

README_capstone.md written


In [16]:
# 15. Final summary prints
print("\nFiles in workspace:", [f for f in os.listdir('.') if f.endswith('.json') or f.endswith('.txt') or f.endswith('.log')])
print("Top model metrics:", top['metrics'])
print("Executive summary (first 400 chars):\n", executive_summary[:400])
print("Notebook run complete. Replace llm_summarize_stub with real LLM calls for bonus points (Gemini).")


Files in workspace: ['agent_memory_capstone.json', 'agent_artifacts.json', 'executive_summary.txt', 'agent_trace.log']
Top model metrics: {'accuracy': 0.7550591617825942, 'precision': 0.26626262626262626, 'recall': 0.6228733459357277, 'f1': 0.3730540617039343, 'roc_auc': 0.7722321981314207}
Executive summary (first 400 chars):
 EXECUTIVE SUMMARY (Simulated LLM Output)
Top model: LogisticRegression_balanced
Metrics:
- Accuracy: 0.755
- Precision: 0.266
- Recall: 0.623
- F1-score: 0.373
- ROC-AUC: 0.772

Top feature signals (approx):
- month_aug: 0.0436
- pdays_never: 0.0371
- poutcome_failure: 0.0345
- month_jul: 0.0324
- month_nov: 0.0323
- poutcome_success: 0.0171
- month_jan: 0.0126
- month_feb: 0.0059

Actionable reco
Notebook run complete. Replace llm_summarize_stub with real LLM calls for bonus points (Gemini).


In [None]:
# pseudocode - DO NOT paste keys into public repo
from google.generativeai import client as gclient
gclient.configure(api_key=os.environ['GEMINI_API_KEY'])
def llm_summarize(prompt):
    resp = gclient.generate(text=prompt, model="gemini-1.5-pro")
    return resp.text
