# Run Full TSB-CL on Kaggle (Using GitHub Code)

This notebook is designed to run on Kaggle. It performs the following steps:
1.  **Clones the GitHub repository** containing the project code.
2.  **Compiles the MSBE** (Maximal Similar Biclique Enumeration) C++ tool.
3.  **Runs the training script** by importing the code directly from the cloned repository.

This ensures you are always running the latest version of the code without manually copying large blocks of text.

In [None]:
# 1. Clone Repository
# Remove existing folder to ensure fresh clone
!rm -rf ai_project 
!git clone https://github.com/yangzeha/ai_project.git

# 2. Install Missing Dependencies
!apt-get update && apt-get install -y libsparsehash-dev

# 3. Compile MSBE (C++ Tool)
import os
import subprocess
import sys

# Define paths
PROJECT_DIR = os.path.abspath("ai_project")
MSBE_DIR = os.path.join(PROJECT_DIR, "Similar-Biclique-Idx-main")
MSBE_EXE_PATH = os.path.join(MSBE_DIR, "msbe")

# Check if directory exists
if os.path.exists(MSBE_DIR):
    print(f"Compiling MSBE in {MSBE_DIR}...")
    
    # Clean up
    if os.path.exists(MSBE_EXE_PATH):
        if os.path.isdir(MSBE_EXE_PATH):
            subprocess.run(["rm", "-rf", "msbe"], cwd=MSBE_DIR)
        else:
            os.remove(MSBE_EXE_PATH)

    # Compile
    cmd = ["g++", "-O3", "main.cpp", "-o", "msbe", "-D_PrintResults_"]
    try:
        subprocess.run(cmd, cwd=MSBE_DIR, check=True)
        subprocess.run(["chmod", "+x", "msbe"], cwd=MSBE_DIR, check=True)
        print("‚úÖ Compilation successful!")
    except subprocess.CalledProcessError as e:
        print(f"‚ùå Compilation failed: {e}")
else:
    print(f"‚ùå Error: Directory {MSBE_DIR} not found.")

# 4. Run Training
# Add project root to system path
if PROJECT_DIR not in sys.path:
    sys.path.append(PROJECT_DIR)

try:
    from TSB_CL_Project.solo_model.model_variants import FullTSBCL
    from TSB_CL_Project.solo_model import train_utils
    print("‚úÖ Modules imported successfully.")
except ImportError as e:
    print(f"‚ùå Import failed: {e}")

# Configure Parameters
train_utils.NUM_SNAPSHOTS = 5
train_utils.BATCH_SIZE = 2048

# Override Paths
train_utils.MSBE_EXE = MSBE_EXE_PATH
train_utils.DATA_PATH = os.path.join(MSBE_DIR, "datasets", "bi_github.txt")
print(f"Data Path: {train_utils.DATA_PATH}")
print(f"MSBE Path: {train_utils.MSBE_EXE}")

# Run Training
print("\nüöÄ Starting Training for Full TSB-CL (Fixed Architecture)...")
metrics = train_utils.run_training(
    model_class=FullTSBCL,
    model_name="Full TSB-CL (GitHub Version)",
    model_type="full",
    epochs=50,  # Ensure 50 epochs
    tau=2,
    epsilon=0.1
)

print("\nTraining Complete!")
if metrics['recall']:
    print("Final Recall:", metrics['recall'][-1])
    print("Final NDCG:", metrics['ndcg'][-1])

# Check output
if os.path.exists("full_best.pth"):
    print("‚úÖ Model saved to: full_best.pth")
else:
    print("‚ùå Model file not found.")