In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/ai-mathematical-olympiad-progress-prize-3/reference.csv
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/AIMO3_Reference_Problems.pdf
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/sample_submission.csv
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/test.csv
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/kaggle_evaluation/aimo_3_inference_server.py
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/kaggle_evaluation/aimo_3_gateway.py
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/kaggle_evaluation/__init__.py
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/kaggle_evaluation/core/templates.py
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/kaggle_evaluation/core/base_gateway.py
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/kaggle_evaluation/core/relay.py
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/kaggle_evaluation/core/kaggle_evaluation.proto
/kaggle/input/ai-mathematical-olympiad-progre

In [3]:
# ========== AI MATHEMATICAL OLYMPIAD - OFFLINE LEARNING SYSTEM ==========
# This code runs completely offline and learns from practice questions
# Then generates submission.csv with id and single-digit integer answers

import os, sys, json, re, hashlib, time, logging, multiprocessing, gc
import numpy as np
import pandas as pd
import polars as pl
import torch
import sympy
from transformers import AutoTokenizer, AutoModel, AutoConfig
from sklearn.model_selection import KFold
import lightgbm as lgb
import wandb

# ========== WANDB OFFLINE MODE ==========
# Set to offline mode - no internet required
wandb.init(
    project="ai-mathematical-olympiad-progress-prize-3",
    mode="offline",  # CRITICAL: Runs without internet
    config={
        "competition": "AI Mathematical Olympiad Progress Prize 3",
        "framework": "LightGBM + Transformers",
        "task": "mathematical_problem_solving",
        "solver_timeout": 25,
        "k_folds": 5,
        "lgb_n_estimators": 50,
        "lgb_learning_rate": 0.1,
        "offline_mode": True
    }
)

# ========== SETUP & PATHS ==========
sys.path.append('/kaggle/input/ai-mathematical-olympiad-progress-prize-3')
import kaggle_evaluation.aimo_3_inference_server

MODEL_PATH = "/kaggle/input/all-minilm-l6-v2-tuning-model-add"
REF_DATA_PATH = "/kaggle/input/ai-mathematical-olympiad-progress-prize-3/reference.csv"
TEST_DATA_PATH = "/kaggle/input/ai-mathematical-olympiad-progress-prize-3/test.csv"
CACHE_PATH = "/kaggle/working/aimo_cache.json"
SOLVER_TIMEOUT = 25 

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("AIMO_LOGIC")

wandb.config.update({
    "model_path": MODEL_PATH,
    "ref_data_path": REF_DATA_PATH,
    "test_data_path": TEST_DATA_PATH
})

# ========== METHOD 1: LOGICAL FEATURE EXTRACTION ==========
def extract_logical_features(text):
    """
    METHOD: Extract mathematical domain and complexity features.
    REASONING: Identifies problem type (modulo, geometry, algebra) and complexity
    indicators (token count, digit density, symbol count) to guide solving strategy.
    """
    text_lower = text.lower()
    features = {
        # Domain identification (Premise)
        'is_mod_premise': 1 if 'modulo' in text_lower or 'mod' in text_lower else 0,
        'is_geom_premise': 1 if any(w in text_lower for w in ['triangle', 'circle', 'radius', 'angle']) else 0,
        'is_algebra_premise': 1 if 'equation' in text_lower or 'solve for' in text_lower else 0,
        
        # Complexity mapping (Analogy)
        'token_count': len(text.split()),
        'digit_density': len(re.findall(r'\d', text)) / (len(text) + 1),
        'math_symbol_count': len(re.findall(r'[\+\-\*\/\=\^\<\>\(\)]', text))
    }
    return features

# ========== METHOD 2: SYMBOLIC SOLVER (DISJUNCTIVE ARGUMENT) ==========
def _disjunctive_logic(text, result_dict):
    """
    METHOD: Disjunctive Argument Solver
    REASONING: Either problem is Symbolic (Case A: modulo) OR Modular (Case B: equation).
    If neither works, defaults to ML (Case C). This is faster and more accurate than pure ML.
    """
    try:
        # Case A: Modular arithmetic (e.g., "123 \pmod{10}")
        mod_match = re.search(r'(\d+)\s*\\pmod\s*{\s*(\d+)\s*}', text)
        if mod_match:
            result = int(mod_match.group(1)) % int(mod_match.group(2))
            result_dict['ans'] = result % 10  # Ensure single digit
            result_dict['method'] = 'modulo'
            return

        # Case B: Symbolic equation solving (e.g., "$x^2 = 4$")
        math_match = re.search(r'\$(.*?)\$', text)
        if math_match and 'x' in math_match.group(1):
            expr = math_match.group(1).replace('^', '**').replace('=', '-')
            sol = sympy.solve(sympy.sympify(expr))
            if sol:
                result = int(abs(float(sol[0].evalf()))) % 10
                result_dict['ans'] = result
                result_dict['method'] = 'symbolic'
                return
    except:
        pass
    result_dict['ans'] = None
    result_dict['method'] = None

def solve_with_reasoning(text):
    """
    METHOD: Timeout-protected symbolic solver
    REASONING: Uses multiprocessing to prevent hanging on complex problems.
    Returns single digit (0-9) answer or None if fails.
    """
    manager = multiprocessing.Manager()
    result_dict = manager.dict({'ans': None, 'method': None})
    p = multiprocessing.Process(target=_disjunctive_logic, args=(text, result_dict))
    p.start()
    p.join(SOLVER_TIMEOUT)
    if p.is_alive():
        p.terminate()
        return None, None
    return result_dict['ans'], result_dict['method']

# ========== METHOD 3: TRANSFORMER REASONING ENGINE ==========
class ReasoningEngine:
    """
    METHOD: Transformer-based semantic reasoning
    REASONING: Uses pre-trained transformer to extract semantic embeddings that capture
    the mathematical "reasoning" needed. These embeddings are combined with logical features
    to predict answers. The model learns patterns from practice problems.
    """
    def __init__(self, path):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        wandb.config.update({"device": self.device})
        try:
            config = AutoConfig.from_pretrained(path)
            if not hasattr(config, 'model_type'): 
                config.model_type = "bert"
            self.tk = AutoTokenizer.from_pretrained(path)
            self.mdl = AutoModel.from_pretrained(
                path, 
                config=config, 
                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
            ).to(self.device)
            self.mdl.eval()
            self.ready = True
            wandb.log({"model_loaded": 1, "model_ready": 1})
        except Exception as e:
            self.ready = False
            wandb.log({"model_loaded": 0, "model_error": str(e)})

    def get_reasoning_vector(self, text):
        """Extract 16-dimensional reasoning vector from problem text."""
        if not self.ready: 
            return np.zeros(16)
        inputs = self.tk([text], return_tensors="pt", padding=True, truncation=True).to(self.device)
        with torch.no_grad():
            out = self.mdl(**inputs, output_hidden_states=True)
            return out.hidden_states[-1].mean(dim=1).cpu().float().numpy()[0, :16]

# ========== GLOBAL INITIALIZATION ==========
ENGINE = ReasoningEngine(MODEL_PATH)
LGB_MODELS = []
MEMO = {}  # Cache for solved problems
REASONING_LOG = []  # Store reasoning for each prediction

# ========== METHOD 4: ENSEMBLE LEARNING FROM PRACTICE SET ==========
def train_ensemble():
    """
    METHOD: K-Fold Cross-Validation Ensemble Learning
    REASONING: 
    1. Loads practice problems from reference.csv
    2. Extracts features: Transformer embeddings (16D) + Logical features (6D) = 22D
    3. Trains 5 LightGBM models using 5-fold CV
    4. Each model learns different patterns, ensemble averages reduce overfitting
    5. Models learn to map problem features → single digit answer (0-9)
    """
    if not os.path.exists(REF_DATA_PATH):
        wandb.log({"training_status": "failed", "reason": "reference_data_not_found"})
        return
    
    # Load practice problems
    df = pd.read_csv(REF_DATA_PATH)
    wandb.config.update({
        "training_samples": len(df),
        "answer_min": int(df['answer'].min()),
        "answer_max": int(df['answer'].max())
    })
    
    # Extract features
    all_feats = []
    all_answers = []
    
    for idx, row in df.iterrows():
        txt = row['problem']
        answer = int(row['answer']) % 10  # Ensure single digit
        
        # Combine: Transformer embeddings + Logical features
        vec = ENGINE.get_reasoning_vector(txt)
        log_f = extract_logical_features(txt)
        feat_vector = np.concatenate([vec, list(log_f.values())])
        all_feats.append(feat_vector)
        all_answers.append(answer)
    
    X = np.array(all_feats)
    y = np.array(all_answers)
    
    wandb.config.update({
        "feature_dim": X.shape[1],
        "n_features": X.shape[1]
    })
    
    # K-Fold training
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    
    for fold_idx, (t_idx, v_idx) in enumerate(kf.split(X)):
        m = lgb.LGBMRegressor(
            n_estimators=50, 
            learning_rate=0.1, 
            verbose=-1,
            objective='regression',
            metric='mae'
        )
        m.fit(X[t_idx], y[t_idx], eval_set=[(X[v_idx], y[v_idx])])
        LGB_MODELS.append(m)
        
        # Validation metrics
        val_preds = m.predict(X[v_idx])
        val_preds = np.clip(np.round(val_preds), 0, 9).astype(int)  # Single digit
        val_mae = np.mean(np.abs(val_preds - y[v_idx]))
        val_accuracy = np.mean(val_preds == y[v_idx])
        
        wandb.log({
            f"fold_{fold_idx}_mae": val_mae,
            f"fold_{fold_idx}_accuracy": val_accuracy
        })
    
    wandb.log({
        "n_models": len(LGB_MODELS),
        "training_status": "completed"
    })

# ========== METHOD 5: PREDICTION WITH REASONING ==========
def predict_with_reasoning(problem_id, problem_text):
    """
    METHOD: Two-stage prediction with detailed reasoning
    REASONING:
    Stage 1: Try symbolic solver (fast, accurate for structured problems)
    Stage 2: If symbolic fails, use ML ensemble (learned from practice set)
    
    Returns: (answer, reasoning_string)
    """
    # Check cache
    h = hashlib.md5(problem_text.encode()).hexdigest()
    if h in MEMO:
        return MEMO[h][0], MEMO[h][1]
    
    # Stage 1: Symbolic solving
    ans, method = solve_with_reasoning(problem_text)
    
    if ans is not None:
        reasoning = f"Symbolic solver ({method}): Direct mathematical computation"
        MEMO[h] = (ans, reasoning)
        return ans, reasoning
    
    # Stage 2: ML ensemble
    vec = ENGINE.get_reasoning_vector(problem_text)
    log_f = list(extract_logical_features(problem_text).values())
    feat = np.concatenate([vec, log_f]).reshape(1, -1)
    
    # Ensemble prediction
    preds = np.mean([m.predict(feat) for m in LGB_MODELS], axis=0)
    ans = int(np.clip(np.round(preds[0]), 0, 9))  # Single digit
    
    # Generate reasoning
    domain = "unknown"
    if log_f[0] > 0: domain = "modulo"
    elif log_f[1] > 0: domain = "geometry"
    elif log_f[2] > 0: domain = "algebra"
    
    reasoning = f"ML ensemble: Learned pattern from practice set (domain={domain}, complexity={log_f[3]:.1f} tokens, raw_pred={preds[0]:.2f})"
    MEMO[h] = (ans, reasoning)
    
    return ans, reasoning

# ========== PREDICTION API (for inference server) ==========
def predict(id_series: pl.Series, prob_series: pl.Series) -> pl.DataFrame:
    """Main prediction function called by inference server."""
    pid = id_series.item(0)
    prob_text = prob_series.item(0)
    
    gc.collect()
    if torch.cuda.is_available(): 
        torch.cuda.empty_cache()

    ans, reasoning = predict_with_reasoning(pid, prob_text)
    
    # Log reasoning
    REASONING_LOG.append({
        "id": pid,
        "answer": ans,
        "reasoning": reasoning
    })
    
    wandb.log({
        "prediction": ans,
        "problem_id": pid
    })
    
    return pl.DataFrame({'id': [pid], 'answer': [ans]})

# ========== MAIN EXECUTION ==========
if __name__ == "__main__":
    print("=" * 80)
    print("AI MATHEMATICAL OLYMPIAD - OFFLINE LEARNING SYSTEM")
    print("=" * 80)
    
    # Step 1: Train from practice set
    print("\n[STEP 1] Training ensemble models from practice set...")
    train_ensemble()
    print(f"✅ Trained {len(LGB_MODELS)} models")
    
    # Step 2: Load test questions
    print("\n[STEP 2] Loading test questions...")
    if os.path.exists(TEST_DATA_PATH):
        test_df = pd.read_csv(TEST_DATA_PATH)
        print(f"✅ Loaded {len(test_df)} test questions")
        wandb.config.update({"test_samples": len(test_df)})
    else:
        print("⚠️ Test file not found, will use inference server")
        test_df = None
    
    # Step 3: Generate predictions for test set (if available)
    if test_df is not None:
        print("\n[STEP 3] Generating predictions for test questions...")
        predictions = []
        
        for idx, row in test_df.iterrows():
            pid = row['id']
            prob_text = row['problem'] if 'problem' in row else str(row)
            
            ans, reasoning = predict_with_reasoning(pid, prob_text)
            predictions.append({
                'id': pid,
                'answer': ans,
                'reasoning': reasoning
            })
            
            if (idx + 1) % 10 == 0:
                print(f"  Processed {idx + 1}/{len(test_df)} questions")
        
        # Step 4: Create submission.csv (LAST STEP)
        print("\n[STEP 4] Creating submission.csv...")
        submission_df = pd.DataFrame({
            'id': [p['id'] for p in predictions],
            'answer': [p['answer'] for p in predictions]
        })
        
        # Ensure all answers are single digits (0-9)
        submission_df['answer'] = submission_df['answer'].clip(0, 9).astype(int)
        
        # Save submission file
        submission_df.to_csv("submission.csv", index=False)
        print(f"✅ Created submission.csv with {len(submission_df)} predictions")
        print(f"   Answer range: {submission_df['answer'].min()} - {submission_df['answer'].max()}")
        
        # Save reasoning log
        reasoning_df = pd.DataFrame(predictions)
        reasoning_df.to_csv("reasoning_log.csv", index=False)
        print("✅ Saved reasoning log to reasoning_log.csv")
        
        # Log to WandB
        wandb.save("submission.csv")
        wandb.save("reasoning_log.csv")
        
        # Display sample predictions
        print("\n[STEP 5] Sample predictions:")
        print(submission_df.head(10).to_string(index=False))
    
    # Step 6: Start inference server (for competition evaluation)
    print("\n[STEP 6] Starting inference server for competition evaluation...")
    print("   Server will handle 50 questions per run (2 runs = 100 total)")
    
    server = kaggle_evaluation.aimo_3_inference_server.AIMO3InferenceServer(predict)
    wandb.log({"server_status": "started"})
    
    try:
        server.serve()
    except KeyboardInterrupt:
        wandb.log({"server_status": "stopped"})
    finally:
        # Finalize
        wandb.log({
            "memo_size": len(MEMO),
            "n_lgb_models": len(LGB_MODELS),
            "engine_ready": ENGINE.ready
        })
        
        wandb.finish()
        print("\n✅ WandB run completed (offline mode)")
        print(f"✅ Cached {len(MEMO)} solutions")
        print("✅ Submission file ready: submission.csv")

/kaggle/input/ai-mathematical-olympiad-progress-prize-3/reference.csv
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/AIMO3_Reference_Problems.pdf
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/sample_submission.csv
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/test.csv
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/kaggle_evaluation/aimo_3_inference_server.py
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/kaggle_evaluation/aimo_3_gateway.py
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/kaggle_evaluation/__init__.py
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/kaggle_evaluation/core/templates.py
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/kaggle_evaluation/core/base_gateway.py
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/kaggle_evaluation/core/relay.py
/kaggle/input/ai-mathematical-olympiad-progress-prize-3/kaggle_evaluation/core/kaggle_evaluation.proto
/kaggle/input/ai-mathematical-olympiad-progre

<IPython.core.display.Javascript object>

KeyboardInterrupt: 