vgel · vgel · Mar 5, 2024 · Mar 5, 2024 · Mar 5, 2024 · Mar 5, 2024
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,40 @@
+name: CI
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  format:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: run black
+      # make sure to keep this in sync with pyproject.toml
+      run: pipx run -- "black>=24.2.0,<25.0.0" --check .
+
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.11"]
+
+    steps:
+    - uses: actions/checkout@v2
+    - run: pipx install poetry
+    - name: install python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+        cache: 'poetry'
+    - run: poetry install
+    - name: cache huggingface
+      uses: actions/cache@v4
+      with:
+        path: ~/.cache/huggingface
+        key: ${{ runner.os }}-hf-models-${{ hashFiles('**/lockfiles') }}
+        restore-keys: |
+          ${{ runner.os }}-hf-models-
+    - run: poetry run pytest
diff --git a/CHANGELOG b/CHANGELOG
@@ -2,6 +2,8 @@
 
 ## Unreleased
 
+* Add GPT-2 support. (#12)
+
 ## 0.2.0 - 2024-03-03
 
 * Add control vector arithmetic. (#6)

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -15,6 +15,12 @@ accelerate = "^0.26.1"
 tqdm = "^4.66.1"
 gguf = "^0.6.0"
 
+[tool.poetry.group.dev.dependencies]
+pytest = "^8.0.2"
+black = "^24.2.0" # make sure to keep this in sync with .github/workflows/ci.yml
+
+[tool.pytest.ini_options]
+python_files = ["tests.py"]
 
 [build-system]
 requires = ["poetry-core"]

diff --git a/repeng/control.py b/repeng/control.py
@@ -26,13 +26,13 @@ def __init__(self, model: PreTrainedModel, layer_ids: typing.Iterable[int]):
 
         super().__init__()
         self.model = model
-        self.layer_ids = [
-            i if i >= 0 else len(model.model.layers) + i for i in layer_ids
-        ]
+
+        layers = model_layer_list(model)
+        self.layer_ids = [i if i >= 0 else len(layers) + i for i in layer_ids]
         for layer_id in layer_ids:
-            layer: torch.nn.Module = self.model.model.layers[layer_id]  # type: ignore
+            layer = layers[layer_id]
             if not isinstance(layer, ControlModule):
-                self.model.model.layers[layer_id] = ControlModule(layer)
+                layers[layer_id] = ControlModule(layer)
             else:
                 warnings.warn(
                     "Trying to rewrap a wrapped model! Probably not what you want! Try calling .unwrap first."
@@ -52,12 +52,14 @@ def unwrap(self) -> PreTrainedModel:
         After using this method, `set_control` and `reset` will not work.
         """
 
+        layers = model_layer_list(self.model)
         for layer_id in self.layer_ids:
-            layer = self.model.model.layers[layer_id]
-            self.model.model.layers[layer_id] = layer.block
+            layers[layer_id] = layers[layer_id].block
         return self.model
 
-    def set_control(self, control: "ControlVector", coeff: float = 1.0, **kwargs) -> None:
+    def set_control(
+        self, control: "ControlVector", coeff: float = 1.0, **kwargs
+    ) -> None:
         """
         Set a `ControlVector` for the layers this ControlModel handles, with a strength given
         by `coeff`. (Negative `coeff` values invert the control vector, e.g. happiness→sadness.)
@@ -103,8 +105,9 @@ def set_raw_control(
           (default: +)
         """
 
+        layers = model_layer_list(self.model)
         for layer_id in self.layer_ids:
-            layer: ControlModule = self.model.model.layers[layer_id]  # type: ignore
+            layer: ControlModule = layers[layer_id]  # type: ignore
             if control is None:
                 layer.reset()
             else:
@@ -194,3 +197,15 @@ def forward(self, *args, **kwargs):
             output = modified
 
         return output
+
+
+def model_layer_list(model: ControlModel | PreTrainedModel) -> torch.nn.ModuleList:
+    if isinstance(model, ControlModel):
+        model = model.model
+
+    if hasattr(model, "model"):  # mistral-like
+        return model.layers
+    elif hasattr(model, "transformer"):  # gpt-2-like
+        return model.transformer.h
+    else:
+        raise ValueError(f"don't know how to get layer list for {type(model)}")
diff --git a/repeng/extract.py b/repeng/extract.py
@@ -9,7 +9,7 @@
 from transformers import PreTrainedModel, PreTrainedTokenizerBase
 import tqdm
 
-from .control import ControlModel
+from .control import ControlModel, model_layer_list
 
 
 @dataclasses.dataclass
@@ -130,10 +130,7 @@ def read_representations(
         hidden_layers = range(-1, -model.config.num_hidden_layers, -1)
 
     # normalize the layer indexes if they're negative
-    if isinstance(model, ControlModel):
-        n_layers = len(model.model.model.layers)
-    else:
-        n_layers = len(model.model.layers)
+    n_layers = len(model_layer_list(model))
     hidden_layers = [i if i >= 0 else n_layers + i for i in hidden_layers]
 
     # the order is [positive, negative, positive, negative, ...]