<a href="https://colab.research.google.com/github/wesslen/llm-experiments/blob/main/notebooks/nondeterminism/structured_data/gemini.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Google's Gemini 1.0 Pro Non-Determinism Experiment Analysis

## Experiment Overview
* Tests Google's [Gemini 1.0 Pro](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.0-pro)'s (`'gemini-pro'`)  output consistency with structured JSON responses
* Uses three different prompt types:
  - Exercise benefits (descriptive/analytical)
  - Random numbers (explicit randomness)
  - Major cities (factual)
* Runs 20 iterations per prompt
* Measures variation using entropy scores and unique response counts

## Hypothesis
* Exercise benefits: Expected moderate variation (entropy ~1.0-1.5)
* Random numbers: Expected high variation (entropy ~2.0-2.5)
* City names: Expected low variation (entropy ~0.5-1.0)

## Results Analysis
TBD

In [22]:
!uv pip install --system google-generativeai

[2mUsing Python 3.10.12 environment at /usr[0m
[2mAudited [1m1 package[0m [2min 73ms[0m[0m


In [23]:
import os
from google.colab import userdata
os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')

In [26]:
import google.generativeai as genai
import json
from typing import List, Dict
import statistics
import datetime
from collections import defaultdict
import numpy as np
from scipy.stats import entropy
import os
from dataclasses import dataclass
import pandas as pd
from pathlib import Path
import re

@dataclass
class ExperimentConfig:
    num_iterations: int = 20
    prompts: List[str] = None

    def __post_init__(self):
        if self.prompts is None:
            self.prompts = [
                """Please provide exactly three key benefits of exercise.
                Your response must be valid JSON that looks exactly like this format:
                {
                    "benefit1": "your first benefit here",
                    "benefit2": "your second benefit here",
                    "benefit3": "your third benefit here"
                }
                Provide ONLY the JSON, with no additional text or explanation.""",

                """Generate three random numbers between 1-100.
                Your response must be valid JSON that looks exactly like this format:
                {
                    "num1": first_number,
                    "num2": second_number,
                    "num3": third_number
                }
                Use actual numbers, not strings. Provide ONLY the JSON, with no additional text.""",

                """Name three major cities.
                Your response must be valid JSON that looks exactly like this format:
                {
                    "city1": "first city name",
                    "city2": "second city name",
                    "city3": "third city name"
                }
                Provide ONLY the JSON, with no additional text."""
            ]

class NonDeterminismExperiment:
    def __init__(self, api_key: str, config: ExperimentConfig):
        genai.configure(api_key=api_key)
        # Initialize Gemini Pro model
        self.model = genai.GenerativeModel('gemini-pro')
        self.config = config
        self.results = defaultdict(list)

    def extract_json_from_text(self, text: str) -> str:
        """Extract JSON object from text, handling various formats"""
        # Try to find JSON-like structure between curly braces
        json_match = re.search(r'\{[^{]*\}', text)
        if json_match:
            return json_match.group(0)
        return text

    def get_gemini_response(self, prompt: str) -> Dict:
        """Get a single response from Gemini and parse as JSON"""
        try:
            # Configure generation parameters
            generation_config = genai.types.GenerationConfig(
                temperature=1,
                top_p=1,
                top_k=1,
                max_output_tokens=1024,
            )

            # Get response from Gemini
            response = self.model.generate_content(
                prompt,
                generation_config=generation_config
            )

            # Extract the text content
            response_text = response.text.strip()

            # Try to extract JSON if it's embedded in other text
            json_text = self.extract_json_from_text(response_text)

            try:
                # First try parsing the extracted JSON
                return json.loads(json_text)
            except json.JSONDecodeError as e:
                print(f"Failed to parse JSON: {e}")
                print(f"Response text: {response_text}")
                return None

        except Exception as e:
            print(f"Error getting response: {e}")
            return None

    def run_experiment(self):
        """Run the experiment for all prompts and iterations"""
        for prompt in self.config.prompts:
            print(f"\nRunning experiment for prompt: {prompt[:50]}...")  # Show just the start of the prompt

            responses = []
            for i in range(self.config.num_iterations):
                print(f"Iteration {i+1}/{self.config.num_iterations}", end='\r')
                response = self.get_gemini_response(prompt)
                if response is not None:
                    responses.append(response)

            self.results[prompt] = responses
            print(f"\nCollected {len(responses)} valid responses")

    def analyze_results(self) -> Dict:
        """Analyze the results and compute statistics"""
        analysis = {}

        for prompt, responses in self.results.items():
            if not responses:  # Skip if no valid responses
                continue

            prompt_analysis = {
                "total_responses": len(responses),
                "unique_responses": len(set(json.dumps(r) for r in responses)),
                "field_analysis": {}
            }

            # Analyze variation in each field
            all_fields = responses[0].keys()
            for field in all_fields:
                values = [r[field] for r in responses]
                unique_values = list(set(values))

                field_stats = {
                    "unique_values": len(unique_values),
                    "value_frequencies": {val: values.count(val) for val in unique_values},
                    "entropy": entropy([values.count(val)/len(values) for val in unique_values])
                }
                prompt_analysis["field_analysis"][field] = field_stats

            analysis[prompt] = prompt_analysis

        return analysis

    def save_results(self, output_dir: str = "experiment_results"):
        """Save raw results and analysis to files"""
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        output_path = Path(output_dir) / timestamp
        output_path.mkdir(parents=True, exist_ok=True)

        # Save raw results
        with open(output_path / "raw_results.json", "w") as f:
            json.dump(dict(self.results), f, indent=2)

        # Save analysis
        analysis = self.analyze_results()
        with open(output_path / "analysis.json", "w") as f:
            json.dump(analysis, f, indent=2)

        # Generate summary report
        self.generate_report(analysis, output_path / "summary_report.txt")

        return output_path

    def generate_report(self, analysis: Dict, output_file: Path):
        """Generate a human-readable summary report"""
        with open(output_file, "w") as f:
            f.write("Gemini Non-Determinism Experiment Summary\n")
            f.write("=======================================\n\n")

            for prompt, results in analysis.items():
                f.write(f"Prompt: {prompt[:100]}...\n")  # Show just the start of the prompt
                f.write(f"Total responses: {results['total_responses']}\n")
                f.write(f"Unique responses: {results['unique_responses']}\n")
                f.write("\nField Analysis:\n")

                for field, stats in results['field_analysis'].items():
                    f.write(f"\n{field}:\n")
                    f.write(f"  Unique values: {stats['unique_values']}\n")
                    f.write(f"  Entropy: {stats['entropy']:.3f}\n")
                    f.write("  Value frequencies:\n")
                    for val, freq in stats['value_frequencies'].items():
                        f.write(f"    {val}: {freq}\n")
                f.write("\n" + "="*50 + "\n")

def run_notebook_experiment(api_key: str = None):
    if api_key is None:
        api_key = os.getenv("GOOGLE_API_KEY")
    if not api_key:
        raise ValueError("Please set GOOGLE_API_KEY environment variable or pass it as a parameter")

    config = ExperimentConfig()
    experiment = NonDeterminismExperiment(api_key, config)

    experiment.run_experiment()
    output_path = experiment.save_results()

    # Display the summary report
    with open(output_path / "summary_report.txt", "r") as f:
        print(f.read())

    return experiment

In [28]:
# Second cell: Run the experiment
experiment = run_notebook_experiment()


Running experiment for prompt: Please provide exactly three key benefits of exerc...
Error getting response: HTTPConnectionPool(host='localhost', port=46873): Read timed out. (read timeout=600.0)


KeyboardInterrupt: 