# 在 Kaggle 上运行 SimGCL (SELFRec)

本 notebook 会：
- 克隆仓库 https://github.com/yangzeha/SELFRec.git
- 安装所需的依赖（调整 faiss 到 faiss-cpu 以便在 Kaggle 上安装）
- 进入仓库并运行 `SimGCL` 模型（非交互式）

使用方法：将此 notebook 上传到 Kaggle，运行全部单元格。运行时间与 Kaggle 资源有关。

In [None]:
import os, sys, subprocess, time

# 核心配置（适配你的MSBEGCL仓库）
repo_dir = 'MSBEGCL'  # 仓库克隆后的目录名（与仓库名一致）
model_name = 'MSBEGCL'  # 要运行的模型名称

# 1) 克隆仓库（如已存在则跳过）
if not os.path.exists(repo_dir):
    print('Cloning MSBEGCL repository...')
    subprocess.run(['git','clone','https://github.com/yangzeha/MSBEGCL.git'], check=True)
else:
    print(f'{repo_dir} already exists')

# 2) 进入仓库目录
if os.path.basename(os.getcwd()) != repo_dir:
    os.chdir(repo_dir)

# [FIX]: 检测并进入 SELFRec 子目录（如果存在）
# 因为项目结构是 MSBEGCL/SELFRec/main.py
if os.path.exists('SELFRec') and os.path.isdir('SELFRec'):
    print('Entering subdirectory: SELFRec (detected project structure)')
    os.chdir('SELFRec')

print('Current dir:', os.getcwd())
# 打印当前目录文件列表以供调试
print('Files in current dir:', os.listdir('.'))

# 3) 展示 requirements.txt（核对）
try:
    if os.path.exists('requirements.txt'):
        print('\n--- requirements.txt ---')
        print(open('requirements.txt','r',encoding='utf-8').read())
    else:
        print('\nWarning: requirements.txt not found in current directory.')
except Exception as e:
    print('Failed to read requirements.txt:', e)

# 4) 安装依赖（保留原适配逻辑）
print('\nStart installing dependencies...')
commands = [
    [sys.executable, '-m', 'pip', 'install', '--upgrade', 'pip', '-q'],
    [sys.executable, '-m', 'pip', 'install', 'PyYAML==6.0.2', 'scipy==1.14.1', '-q'],
]
for cmd in commands:
    print('Running:', ' '.join(cmd))
    try:
        subprocess.run(cmd, check=True)
    except subprocess.CalledProcessError as e:
        print('Command failed (warning):', e)

# 单独尝试安装 faiss-cpu
try:
    print('Installing faiss-cpu...')
    subprocess.run([sys.executable, '-m', 'pip', 'install', 'faiss-cpu==1.13.1', '-q'], check=True)
except subprocess.CalledProcessError:
    print('faiss-cpu==1.13.1 failed, attempting to install latest faiss-cpu (non-fatal)')
    try:
        subprocess.run([sys.executable, '-m', 'pip', 'install', 'faiss-cpu', '-q'], check=True)
    except subprocess.CalledProcessError as e2:
        print('faiss-cpu install failed (continuing):', e2)
print('Dependency installation step finished.')

# 5) 检查 torch 状态
try:
    import torch
    print('torch version:', torch.__version__)
    print('cuda available:', torch.cuda.is_available())
except Exception as e:
    print('torch not available or import failed:', e)

# 6) 实时运行 MSBEGCL 模型
print(f'\nStart running {model_name} via main.py (streaming output)...')
start = time.time()

try:
    process = subprocess.Popen(
        [sys.executable, '-u', 'main.py'],
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT, 
        text=True,
        bufsize=1
    )
    
    # 写入模型名称 MSBEGCL 并回车
    try:
        process.stdin.write(f'{model_name}\n')
        process.stdin.flush()
        process.stdin.close()
    except Exception as e:
        print(f"Error writing to stdin: {e}")

    # 循环读取输出
    print("\n--- Model Output Start ---\n")
    while True:
        line = process.stdout.readline()
        if not line and process.poll() is not None:
            break
        if line:
            print(line.strip())
            
    rc = process.poll()
    print(f"\n--- Model Output End (Exit Code: {rc}) ---")
    
    if rc != 0:
        print(f"{model_name} execution failed.")
    
except Exception as e:
    print('Running main.py failed with exception:', e)

print(f'\nRun finished. Total time: {time.time() - start:.2f}s')