In [21]:
import torch
from transformers import AutoModelForSequenceClassification

HF_NAME = "indobenchmark/indobert-base-p2"  # atau: "indobenchmark/indobert-lite-base-p2"

m = AutoModelForSequenceClassification.from_pretrained(HF_NAME, num_labels=2)
src = m.state_dict()
dst = {}

# ---- Embeddings & LayerNorm (match kalau embedding_size Rust = 768) ----
dst["embedding.weight"]           = src["bert.embeddings.word_embeddings.weight"].cpu()
dst["position_embedding.weight"]  = src["bert.embeddings.position_embeddings.weight"].cpu()
dst["layer_norm.weight"]          = src["bert.embeddings.LayerNorm.weight"].cpu()
dst["layer_norm.bias"]            = src["bert.embeddings.LayerNorm.bias"].cpu()

# ---- 12 encoder layers ----
for i in range(12):
    # self-attention q/k/v/out
    dst[f"encoder_{i}.self_attn.query.weight"] = src[f"bert.encoder.layer.{i}.attention.self.query.weight"].cpu()
    dst[f"encoder_{i}.self_attn.query.bias"]   = src[f"bert.encoder.layer.{i}.attention.self.query.bias"].cpu()
    dst[f"encoder_{i}.self_attn.key.weight"]   = src[f"bert.encoder.layer.{i}.attention.self.key.weight"].cpu()
    dst[f"encoder_{i}.self_attn.key.bias"]     = src[f"bert.encoder.layer.{i}.attention.self.key.bias"].cpu()
    dst[f"encoder_{i}.self_attn.value.weight"] = src[f"bert.encoder.layer.{i}.attention.self.value.weight"].cpu()
    dst[f"encoder_{i}.self_attn.value.bias"]   = src[f"bert.encoder.layer.{i}.attention.self.value.bias"].cpu()
    dst[f"encoder_{i}.self_attn.out.weight"]   = src[f"bert.encoder.layer.{i}.attention.output.dense.weight"].cpu()
    dst[f"encoder_{i}.self_attn.out.bias"]     = src[f"bert.encoder.layer.{i}.attention.output.dense.bias"].cpu()

    # layer norms
    dst[f"encoder_{i}.norm1.weight"] = src[f"bert.encoder.layer.{i}.attention.output.LayerNorm.weight"].cpu()
    dst[f"encoder_{i}.norm1.bias"]   = src[f"bert.encoder.layer.{i}.attention.output.LayerNorm.bias"].cpu()
    dst[f"encoder_{i}.norm2.weight"] = src[f"bert.encoder.layer.{i}.output.LayerNorm.weight"].cpu()
    dst[f"encoder_{i}.norm2.bias"]   = src[f"bert.encoder.layer.{i}.output.LayerNorm.bias"].cpu()

    # feed-forward
    dst[f"encoder_{i}.feed_forward.linear1.weight"] = src[f"bert.encoder.layer.{i}.intermediate.dense.weight"].cpu()
    dst[f"encoder_{i}.feed_forward.linear1.bias"]   = src[f"bert.encoder.layer.{i}.intermediate.dense.bias"].cpu()
    dst[f"encoder_{i}.feed_forward.linear2.weight"] = src[f"bert.encoder.layer.{i}.output.dense.weight"].cpu()
    dst[f"encoder_{i}.feed_forward.linear2.bias"]   = src[f"bert.encoder.layer.{i}.output.dense.bias"].cpu()

# ---- classifier (ini yang bikin error kamu barusan) ----
dst["classifier.weight"] = src["classifier.weight"].cpu()
dst["classifier.bias"]   = src["classifier.bias"].cpu()

# Simpan dengan legacy serializer supaya kebaca load_partial libtorch
torch.save(dst, "clean_state_dict.pt", _use_new_zipfile_serialization=False)

# (opsional) kalau masih mau safetensors juga
try:
    from safetensors.torch import save_file
    save_file(dst, "clean_state_dict.safetensors")
except Exception as e:
    print("Skip safetensors:", e)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
