## MalConv2 Model Execution
This section demonstrates how to load and run the MalConv2 model using the instructions and files from the `MalConv2-main` directory. Code is based on the README instructions.

In [None]:
print(3)

In [None]:
import sys
import os
import torch
import numpy as np
import torch.nn.functional as F

# 경로 설정 유연성 확보 (Notebook 실행 위치에 따라 경로가 다를 수 있음)
# MalConv2-main 폴더 찾기
current_dir = os.getcwd()
possible_paths = [
    os.path.join(current_dir, 'MalConv2-main'),         # 작업 설정이 루트일 경우
    os.path.join(current_dir, '../models/MalConv2-main'),      # 작업 설정이 현재 파일 위치(test/)일 경우
    '/Users/wjm/Desktop/2026 프로젝트/Binary-Hunter/models/MalConv2-main' # 절대 경로 (fallback)
]

malconv_path = None
for p in possible_paths:
    if os.path.exists(p) and os.path.isdir(p):
        malconv_path = os.path.abspath(p)
        break

if malconv_path:
    if malconv_path not in sys.path:
        sys.path.append(malconv_path)
    print(f"MalConv2-main path added: {malconv_path}")
else:
    print("Error: Could not find MalConv2-main directory.")

# src 폴더 경로 추가 (preprocess.py 사용을 위해)
src_path = os.path.abspath(os.path.join(current_dir, '../'))
if src_path not in sys.path:
    sys.path.append(src_path)

try:
    # MalConvGCT 및 전처리 함수 임포트
    from MalConvGCT_nocat import MalConvGCT
    from src import preprocess_pe_file
    print("Successfully imported MalConvGCT and preprocess_pe_file")

    # 모델 초기화 (README의 파라미터 참고)
    # channels=256, window_size=256, stride=64 설정
    channels = 256
    window_size = 256
    stride = 64
    embd_size = 8
    
    print("Initializing model...")
    model = MalConvGCT(out_size=2, channels=channels, window_size=window_size, stride=stride, embd_size=embd_size)
    
    # 체크포인트 로드
    checkpoint_path = os.path.join(malconv_path, 'malconvGCT_nocat.checkpoint')
    
    if os.path.exists(checkpoint_path):
        print(f"Loading checkpoint from {checkpoint_path}...")
        # Mac M2(Apple Silicon) 호환성을 위해 map_location='cpu' 사용 권장
        checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu'))
        
        # 가중치 로드
        if 'model_state_dict' in checkpoint:
            model.load_state_dict(checkpoint['model_state_dict'], strict=False)
            print("Model weights loaded successfully.")
        else:
            print(f"Warning: 'model_state_dict' not found. Keys: {checkpoint.keys()}")
    else:
        print(f"Warning: Checkpoint file not found at {checkpoint_path}. Running with random weights.")

    # 모델을 평가 모드로 설정
    model.eval()

    # 실제 파일 전처리 및 실행 테스트
    target_file = 'ZoomInstaller.exe'
    
    # 파일 존재 여부 확인 후 진행
    if os.path.exists(target_file):
        print(f"Processing file: {target_file}")
        
        # preprocess_pe_file 함수를 사용하여 입력 데이터 준비
        # mode='default' 사용
        input_tensor = preprocess_pe_file(target_file, mode='default')
        
        print(f"Running inference on input with shape {input_tensor.shape}...")
        
        with torch.no_grad():
            output = model(input_tensor)
            # MalConvGCT returns: (logits, penult, post_conv)
            logits = output[0]
            probabilities = F.softmax(logits, dim=1)
            
        print("\nExecution Complete!")
        print(f"Logits: {logits}")
        
        # 결과 해석
        print("\n[결과 해석]")
        print(f"1. Logits (Raw Score): {logits.tolist()}")
        print(f"2. Probabilities (Softmax): {probabilities.tolist()}")
        print(f"   - Class 0 (Benign/정상): {probabilities[0][0].item():.4f} ({probabilities[0][0].item()*100:.2f}%)")
        print(f"   - Class 1 (Malware/악성): {probabilities[0][1].item():.4f} ({probabilities[0][1].item()*100:.2f}%)")
        
        predicted_class = torch.argmax(probabilities, dim=1).item()
        class_label = "악성 (Malware)" if predicted_class == 1 else "정상 (Benign)"
        print(f"3. 최종 예측: {class_label} (Class {predicted_class})")
        
    else:
        print(f"Error: Target file '{target_file}' not found. Please make sure the file exists in the current directory.")

except ImportError as e:
    print(f"Import Error: {e}")
    print("Make sure MalConv2-main is in the path and src package is accessible.")
except Exception as e:
    print(f"An error occurred: {e}")
    import traceback
    traceback.print_exc()

MalConv2-main path added: /Users/wjm/Desktop/2026 프로젝트/Binary-Hunter/models/MalConv2-main
Successfully imported MalConvGCT and preprocess_pe_file
Initializing model...
Loading checkpoint from /Users/wjm/Desktop/2026 프로젝트/Binary-Hunter/models/MalConv2-main/malconvGCT_nocat.checkpoint...
Model weights loaded successfully.
Processing file: ZoomInstaller.exe
Running inference on input with shape torch.Size([1, 4000000])...

Execution Complete!
Logits: tensor([[ 2.7693, -5.6580]])

[결과 해석]
1. Logits (Raw Score): [[2.769256591796875, -5.658012866973877]]
2. Probabilities (Softmax): [[0.9997811913490295, 0.0002187703939853236]]
   - Class 0 (Benign/정상): 0.9998 (99.98%)
   - Class 1 (Malware/악성): 0.0002 (0.02%)
3. 최종 예측: 정상 (Benign) (Class 0)


In [None]:
# Config 로드 테스트
from src.utils import load_config
try:
    config = load_config()
    print("Config loaded successfully:")
    print(config)
except Exception as e:
    print(f"Failed to load config: {e}")

Config loaded successfully:
{'model': {'malconv': {'channels': 256, 'window_size': 256, 'stride': 64, 'embd_size': 8, 'num_classes': 2}}, 'attack': {'padding_ratio': 0.01, 'iterations': 20}, 'explainability': {'deep_shap': {'baseline': 'zero'}}}


In [None]:
# Deep SHAP 설명력 테스트
try:
    from src import compute_deep_shap
    import matplotlib.pyplot as plt

    print("Computing Deep SHAP values...")
    # target_class=1 (악성)에 대한 기여도 계산
    shap_values = compute_deep_shap(model, input_tensor, target_class=1)
    
    print(f"SHAP values calculated. Shape: {shap_values.shape}")
    print(f"Max contribution: {np.max(shap_values)}")
    print(f"Min contribution: {np.min(shap_values)}")
    
    # 시각화 (상위 1000바이트만 예시로)
    plt.figure(figsize=(15, 5))
    plt.plot(shap_values[:1000])
    plt.title("Deep SHAP Values (First 1000 bytes)")
    plt.xlabel("Byte Index")
    plt.ylabel("Contribution to Malware Class")
    plt.show()
    
    # 가장 기여도가 높은 Top 10 바이트 인덱스
    top_indices = np.argsort(shap_values)[-10:][::-1]
    print(f"Top 10 contributing byte indices: {top_indices}")
    print(f"Top 10 values: {shap_values[top_indices]}")

except Exception as e:
    print(f"Error during SHAP computation: {e}")
    import traceback
    traceback.print_exc()