In [1]:
%load_ext autoreload
%autoreload 2
import sys
import os
# Add the project root (toy-wm directory) to sys.path so we can import src as a package
# This allows relative imports within the package to work correctly
# Assumes notebook is run from the project root directory
project_root = os.getcwd()  # or use: os.path.dirname(os.path.abspath(''))
print(project_root)
if project_root not in sys.path:
    sys.path.insert(0, project_root)

/share/u/wendler/code/toy-wm


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import torch as t

# Import using absolute path from src package
from src.utils.checkpoint import load_model_from_config

#model = load_model_from_config("configs/small_6frame.yaml")
model = load_model_from_config("configs/bigger_30frame_causal.yaml", strict=False)
model.eval()

loading configs/bigger_30frame_causal.yaml


CausalDit(
  (rope_seq): RoPE()
  (blocks): ModuleList(
    (0-7): 8 x CausalBlock(
      (norm1): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
      (selfattn): AttentionEinOps(
        (rope): RoPE()
        (ln1): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
        (ln2): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
      )
      (norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
      (geglu): GEGLU(
        (up_proj): Linear(in_features=384, out_features=1536, bias=True)
        (up_gate): Linear(in_features=384, out_features=1536, bias=True)
        (down): Linear(in_features=1536, out_features=384, bias=True)
        (nonlin): SiLU()
      )
      (modulation): Sequential(
        (0): SiLU()
        (1): Linear(in_features=384, out_features=2304, bias=True)
      )
    )
  )
  (patch): Patch(
    (init_conv_seq): Sequential(
      (0): Conv2d(3, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (1): SiLU()
      (2): GroupNorm(3

In [3]:
model.activate_caching(1, 300)

torch.Size([1, 19500, 1, 32]) torch.Size([1, 19500, 1, 32])
updating rope for block 0
torch.Size([1, 1950, 1, 32]) torch.Size([1, 1950, 1, 32])
torch.Size([1, 19500, 1, 32]) torch.Size([1, 19500, 1, 32])
updating rope for block 1
torch.Size([1, 1950, 1, 32]) torch.Size([1, 1950, 1, 32])
torch.Size([1, 19500, 1, 32]) torch.Size([1, 19500, 1, 32])
updating rope for block 2
torch.Size([1, 1950, 1, 32]) torch.Size([1, 1950, 1, 32])
torch.Size([1, 19500, 1, 32]) torch.Size([1, 19500, 1, 32])
updating rope for block 3
torch.Size([1, 1950, 1, 32]) torch.Size([1, 1950, 1, 32])
torch.Size([1, 19500, 1, 32]) torch.Size([1, 19500, 1, 32])
updating rope for block 4
torch.Size([1, 1950, 1, 32]) torch.Size([1, 1950, 1, 32])
torch.Size([1, 19500, 1, 32]) torch.Size([1, 19500, 1, 32])
updating rope for block 5
torch.Size([1, 1950, 1, 32]) torch.Size([1, 1950, 1, 32])
torch.Size([1, 19500, 1, 32]) torch.Size([1, 19500, 1, 32])
updating rope for block 6
torch.Size([1, 1950, 1, 32]) torch.Size([1, 1950, 

In [4]:
from src.datasets.pong1m import get_loader

loader, pred2frame = get_loader(duration=1, fps=30, mode='-1,1')
frames, actions = next(iter(loader))
frames = pred2frame(frames)

520 batches


In [None]:
from src.trainers.diffusion_forcing import sample

def render_video(actions, n_steps=8, cfg=0, negative_actions=None, clamp=True):
    device = model.device
    frames = t.randn(1, actions.shape[1], 3, 24, 24, device="cpu")
    for aidx in range(actions.shape[1]):
        print(aidx)
        noise = t.randn(1, 1, 3, 24, 24, device=device)
        z = sample(model, noise, actions[:, aidx:aidx+1], num_steps=n_steps, cfg=cfg, negative_actions=negative_actions)
        model.cache.update_global_location(1)
        frames[:, aidx:aidx+1] = z.detach().cpu()
        if clamp:
            frames = frames.clamp(-1, 1)
    return frames

In [6]:
model = t.compile(model)

In [7]:
actions = t.tensor([[0]+119*[2]], dtype=t.int32, device=model.device)
negative_actions = t.tensor([[0]+29*[2]], dtype=t.int32, device=model.device)
z = render_video(actions, n_steps=4, cfg=0, negative_actions=None, clamp=True)
frames = pred2frame(z)

import matplotlib.pyplot as plt
import matplotlib.animation as animation

# z[0] shape: (num_frames, height, width, 3), where num_frames = frames.shape[1]
frames_np = frames[0].permute(0, 2, 3, 1).cpu().numpy()  # shape: (num_frames, height, width, 3)

fig, ax = plt.subplots()
im = ax.imshow(frames_np[0])
ax.set_title('Frame 0')

def animate(i):
    im.set_data(frames_np[i])
    ax.set_title(f'Frame {i}')
    return [im]

ani = animation.FuncAnimation(
    fig, animate, frames=frames_np.shape[0],
    interval=200, blit=True, repeat=True
)

plt.show()

# Optional: To save the gif uncomment below (requires pillow, install with `pip install pillow`)
from collections import Counter
# Majority vote over values in actions for the filename
if actions is not None:
    # Flatten actions tensor and convert to list
    action_vals = actions.flatten().tolist()
    majority_val = Counter(action_vals).most_common(1)[0][0]
    ani.save(f"sample_{majority_val}.gif", writer="pillow")
else:
    ani.save("sample_unknown.gif", writer="pillow")




0


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 0
RoPE forward torch.Size([1, 65, 12, 32]) 0
RoPE forward torch.Size([1, 65, 12, 32]) 0
RoPE forward torch.Size([1, 65, 12, 32]) 0
RoPE forward torch.Size([1, 65, 12, 32]) 0
RoPE forward torch.Size([1, 65, 12, 32]) 0
RoPE forward torch.Size([1, 65, 12, 32]) 0
RoPE forward torch.Size([1, 65, 12, 32]) 0
RoPE forward torch.Size([1, 65, 12, 32]) 0
RoPE forward torch.Size([1, 65, 12, 32]) 0
RoPE forward torch.Size([1, 65, 12, 32]) 0
RoPE forward torch.Size([1, 65, 12, 32]) 0
RoPE forward torch.Size([1, 65, 12, 32]) 0
RoPE forward torch.Size([1, 65, 12, 32]) 0
RoPE forward torch.Size([1, 65, 12, 32]) 0
RoPE forward torch.Size([1, 65, 12, 32]) 0


 25%|██▌       | 1/4 [00:41<02:03, 41.30s/it]W1105 16:16:16.429000 105160 .venv/lib/python3.13/site-packages/torch/_dynamo/convert_frame.py:1358] [4/8] torch._dynamo hit config.recompile_limit (8)
W1105 16:16:16.429000 105160 .venv/lib/python3.13/site-packages/torch/_dynamo/convert_frame.py:1358] [4/8]    function: 'forward' (/share/u/wendler/code/toy-wm/src/models/dit_dforce.py:39)
W1105 16:16:16.429000 105160 .venv/lib/python3.13/site-packages/torch/_dynamo/convert_frame.py:1358] [4/8]    last reason: 4/7: self.layer_idx == 7                                      # assert layer_idx == self.curr_layer, f"layer idx should be the same as our internal counter but we got {layer_idx} and internal is {self.curr_layer}."  # src/nn/attn.py:31 in get (HINT: torch.compile considers integer attributes of the nn.Module to be static. If you are observing recompilation, you might want to make this integer dynamic using torch._dynamo.config.allow_unspec_int_on_nn_module = True, or convert this intege

RoPE forward torch.Size([1, 65, 12, 32]) 65
RoPE forward torch.Size([1, 65, 12, 32]) 65
RoPE forward torch.Size([1, 65, 12, 32]) 65
RoPE forward torch.Size([1, 65, 12, 32]) 65
RoPE forward torch.Size([1, 65, 12, 32]) 65
RoPE forward torch.Size([1, 65, 12, 32]) 65
RoPE forward torch.Size([1, 65, 12, 32]) 65
RoPE forward torch.Size([1, 65, 12, 32]) 65
RoPE forward torch.Size([1, 65, 12, 32]) 65
RoPE forward torch.Size([1, 65, 12, 32]) 65
RoPE forward torch.Size([1, 65, 12, 32]) 65
RoPE forward torch.Size([1, 65, 12, 32]) 65
RoPE forward torch.Size([1, 65, 12, 32]) 65
RoPE forward torch.Size([1, 65, 12, 32]) 65
RoPE forward torch.Size([1, 65, 12, 32]) 65
RoPE forward torch.Size([1, 65, 12, 32]) 65
RoPE forward torch.Size([1, 65, 12, 32]) 130
RoPE forward torch.Size([1, 65, 12, 32]) 130
RoPE forward torch.Size([1, 65, 12, 32]) 130
RoPE forward torch.Size([1, 65, 12, 32]) 130
RoPE forward torch.Size([1, 65, 12, 32]) 130
RoPE forward torch.Size([1, 65, 12, 32]) 130
RoPE forward torch.Size([1

  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 260
RoPE forward torch.Size([1, 65, 12, 32]) 260
RoPE forward torch.Size([1, 65, 12, 32]) 260
RoPE forward torch.Size([1, 65, 12, 32]) 260
RoPE forward torch.Size([1, 65, 12, 32]) 260
RoPE forward torch.Size([1, 65, 12, 32]) 260
RoPE forward torch.Size([1, 65, 12, 32]) 260
RoPE forward torch.Size([1, 65, 12, 32]) 260
RoPE forward torch.Size([1, 65, 12, 32]) 260
RoPE forward torch.Size([1, 65, 12, 32]) 260
RoPE forward torch.Size([1, 65, 12, 32]) 260
RoPE forward torch.Size([1, 65, 12, 32]) 260
RoPE forward torch.Size([1, 65, 12, 32]) 260
RoPE forward torch.Size([1, 65, 12, 32]) 260
RoPE forward torch.Size([1, 65, 12, 32]) 260
RoPE forward torch.Size([1, 65, 12, 32]) 260
RoPE forward torch.Size([1, 65, 12, 32]) 325
RoPE forward torch.Size([1, 65, 12, 32]) 325
RoPE forward torch.Size([1, 65, 12, 32]) 325
RoPE forward torch.Size([1, 65, 12, 32]) 325
RoPE forward torch.Size([1, 65, 12, 32]) 325
RoPE forward torch.Size([1, 65, 12, 32]) 325
RoPE forwa

100%|██████████| 4/4 [00:00<00:00, 25.92it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 325
RoPE forward torch.Size([1, 65, 12, 32]) 325
RoPE forward torch.Size([1, 65, 12, 32]) 390
RoPE forward torch.Size([1, 65, 12, 32]) 390
RoPE forward torch.Size([1, 65, 12, 32]) 390
RoPE forward torch.Size([1, 65, 12, 32]) 390
RoPE forward torch.Size([1, 65, 12, 32]) 390
RoPE forward torch.Size([1, 65, 12, 32]) 390
RoPE forward torch.Size([1, 65, 12, 32]) 390
RoPE forward torch.Size([1, 65, 12, 32]) 390
RoPE forward torch.Size([1, 65, 12, 32]) 390
RoPE forward torch.Size([1, 65, 12, 32]) 390
RoPE forward torch.Size([1, 65, 12, 32]) 390
RoPE forward torch.Size([1, 65, 12, 32]) 390
RoPE forward torch.Size([1, 65, 12, 32]) 390
RoPE forward torch.Size([1, 65, 12, 32]) 390
RoPE forward torch.Size([1, 65, 12, 32]) 390
RoPE forward torch.Size([1, 65, 12, 32]) 390
RoPE forward torch.Size([1, 65, 12, 32]) 455
RoPE forward torch.Size([1, 65, 12, 32]) 455
RoPE forward torch.Size([1, 65, 12, 32]) 455
RoPE forward torch.Size([1, 65, 12, 32]) 455
RoPE forwa

  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 520
RoPE forward torch.Size([1, 65, 12, 32]) 520
RoPE forward torch.Size([1, 65, 12, 32]) 520
RoPE forward torch.Size([1, 65, 12, 32]) 520
RoPE forward torch.Size([1, 65, 12, 32]) 520
RoPE forward torch.Size([1, 65, 12, 32]) 520
RoPE forward torch.Size([1, 65, 12, 32]) 520
RoPE forward torch.Size([1, 65, 12, 32]) 520
RoPE forward torch.Size([1, 65, 12, 32]) 520
RoPE forward torch.Size([1, 65, 12, 32]) 520
RoPE forward torch.Size([1, 65, 12, 32]) 520
RoPE forward torch.Size([1, 65, 12, 32]) 520
RoPE forward torch.Size([1, 65, 12, 32]) 520
RoPE forward torch.Size([1, 65, 12, 32]) 520
RoPE forward torch.Size([1, 65, 12, 32]) 520
RoPE forward torch.Size([1, 65, 12, 32]) 520
RoPE forward torch.Size([1, 65, 12, 32]) 585
RoPE forward torch.Size([1, 65, 12, 32]) 585
RoPE forward torch.Size([1, 65, 12, 32]) 585
RoPE forward torch.Size([1, 65, 12, 32]) 585


 75%|███████▌  | 3/4 [00:00<00:00, 26.43it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 585
RoPE forward torch.Size([1, 65, 12, 32]) 585
RoPE forward torch.Size([1, 65, 12, 32]) 585
RoPE forward torch.Size([1, 65, 12, 32]) 585
RoPE forward torch.Size([1, 65, 12, 32]) 585
RoPE forward torch.Size([1, 65, 12, 32]) 585
RoPE forward torch.Size([1, 65, 12, 32]) 585
RoPE forward torch.Size([1, 65, 12, 32]) 585
RoPE forward torch.Size([1, 65, 12, 32]) 585
RoPE forward torch.Size([1, 65, 12, 32]) 585
RoPE forward torch.Size([1, 65, 12, 32]) 585
RoPE forward torch.Size([1, 65, 12, 32]) 585
RoPE forward torch.Size([1, 65, 12, 32]) 650
RoPE forward torch.Size([1, 65, 12, 32]) 650
RoPE forward torch.Size([1, 65, 12, 32]) 650
RoPE forward torch.Size([1, 65, 12, 32]) 650
RoPE forward torch.Size([1, 65, 12, 32]) 650
RoPE forward torch.Size([1, 65, 12, 32]) 650
RoPE forward torch.Size([1, 65, 12, 32]) 650
RoPE forward torch.Size([1, 65, 12, 32]) 650
RoPE forward torch.Size([1, 65, 12, 32]) 650
RoPE forward torch.Size([1, 65, 12, 32]) 650
RoPE forwa

100%|██████████| 4/4 [00:00<00:00, 25.07it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 715
RoPE forward torch.Size([1, 65, 12, 32]) 715
RoPE forward torch.Size([1, 65, 12, 32]) 715
RoPE forward torch.Size([1, 65, 12, 32]) 715
RoPE forward torch.Size([1, 65, 12, 32]) 715
RoPE forward torch.Size([1, 65, 12, 32]) 715
RoPE forward torch.Size([1, 65, 12, 32]) 715
RoPE forward torch.Size([1, 65, 12, 32]) 715
RoPE forward torch.Size([1, 65, 12, 32]) 715
RoPE forward torch.Size([1, 65, 12, 32]) 715
RoPE forward torch.Size([1, 65, 12, 32]) 715
RoPE forward torch.Size([1, 65, 12, 32]) 715
RoPE forward torch.Size([1, 65, 12, 32]) 715
RoPE forward torch.Size([1, 65, 12, 32]) 715
RoPE forward torch.Size([1, 65, 12, 32]) 715
3


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 780
RoPE forward torch.Size([1, 65, 12, 32]) 780
RoPE forward torch.Size([1, 65, 12, 32]) 780
RoPE forward torch.Size([1, 65, 12, 32]) 780
RoPE forward torch.Size([1, 65, 12, 32]) 780
RoPE forward torch.Size([1, 65, 12, 32]) 780
RoPE forward torch.Size([1, 65, 12, 32]) 780
RoPE forward torch.Size([1, 65, 12, 32]) 780
RoPE forward torch.Size([1, 65, 12, 32]) 780
RoPE forward torch.Size([1, 65, 12, 32]) 780
RoPE forward torch.Size([1, 65, 12, 32]) 780
RoPE forward torch.Size([1, 65, 12, 32]) 780
RoPE forward torch.Size([1, 65, 12, 32]) 780
RoPE forward torch.Size([1, 65, 12, 32]) 780
RoPE forward torch.Size([1, 65, 12, 32]) 780
RoPE forward torch.Size([1, 65, 12, 32]) 780
RoPE forward torch.Size([1, 65, 12, 32]) 845
RoPE forward torch.Size([1, 65, 12, 32]) 845
RoPE forward torch.Size([1, 65, 12, 32]) 845
RoPE forward torch.Size([1, 65, 12, 32]) 845
RoPE forward torch.Size([1, 65, 12, 32]) 845
RoPE forward torch.Size([1, 65, 12, 32]) 845
RoPE forwa

 75%|███████▌  | 3/4 [00:00<00:00, 23.87it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 910
RoPE forward torch.Size([1, 65, 12, 32]) 910
RoPE forward torch.Size([1, 65, 12, 32]) 910
RoPE forward torch.Size([1, 65, 12, 32]) 910
RoPE forward torch.Size([1, 65, 12, 32]) 910
RoPE forward torch.Size([1, 65, 12, 32]) 910
RoPE forward torch.Size([1, 65, 12, 32]) 910
RoPE forward torch.Size([1, 65, 12, 32]) 910
RoPE forward torch.Size([1, 65, 12, 32]) 910
RoPE forward torch.Size([1, 65, 12, 32]) 910
RoPE forward torch.Size([1, 65, 12, 32]) 910
RoPE forward torch.Size([1, 65, 12, 32]) 910
RoPE forward torch.Size([1, 65, 12, 32]) 910
RoPE forward torch.Size([1, 65, 12, 32]) 910
RoPE forward torch.Size([1, 65, 12, 32]) 975
RoPE forward torch.Size([1, 65, 12, 32]) 975
RoPE forward torch.Size([1, 65, 12, 32]) 975
RoPE forward torch.Size([1, 65, 12, 32]) 975
RoPE forward torch.Size([1, 65, 12, 32]) 975
RoPE forward torch.Size([1, 65, 12, 32]) 975
RoPE forward torch.Size([1, 65, 12, 32]) 975
RoPE forward torch.Size([1, 65, 12, 32]) 975
RoPE forwa

100%|██████████| 4/4 [00:00<00:00, 23.93it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 975
RoPE forward torch.Size([1, 65, 12, 32]) 975
4


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 1040
RoPE forward torch.Size([1, 65, 12, 32]) 1040
RoPE forward torch.Size([1, 65, 12, 32]) 1040
RoPE forward torch.Size([1, 65, 12, 32]) 1040
RoPE forward torch.Size([1, 65, 12, 32]) 1040
RoPE forward torch.Size([1, 65, 12, 32]) 1040
RoPE forward torch.Size([1, 65, 12, 32]) 1040
RoPE forward torch.Size([1, 65, 12, 32]) 1040
RoPE forward torch.Size([1, 65, 12, 32]) 1040
RoPE forward torch.Size([1, 65, 12, 32]) 1040
RoPE forward torch.Size([1, 65, 12, 32]) 1040
RoPE forward torch.Size([1, 65, 12, 32]) 1040
RoPE forward torch.Size([1, 65, 12, 32]) 1040
RoPE forward torch.Size([1, 65, 12, 32]) 1040
RoPE forward torch.Size([1, 65, 12, 32]) 1040
RoPE forward torch.Size([1, 65, 12, 32]) 1040
RoPE forward torch.Size([1, 65, 12, 32]) 1105
RoPE forward torch.Size([1, 65, 12, 32]) 1105
RoPE forward torch.Size([1, 65, 12, 32]) 1105
RoPE forward torch.Size([1, 65, 12, 32]) 1105
RoPE forward torch.Size([1, 65, 12, 32]) 1105
RoPE forward torch.Size([1, 65, 12

100%|██████████| 4/4 [00:00<00:00, 23.53it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 1235
RoPE forward torch.Size([1, 65, 12, 32]) 1235
RoPE forward torch.Size([1, 65, 12, 32]) 1235
RoPE forward torch.Size([1, 65, 12, 32]) 1235
RoPE forward torch.Size([1, 65, 12, 32]) 1235
RoPE forward torch.Size([1, 65, 12, 32]) 1235
RoPE forward torch.Size([1, 65, 12, 32]) 1235
RoPE forward torch.Size([1, 65, 12, 32]) 1235
RoPE forward torch.Size([1, 65, 12, 32]) 1235
RoPE forward torch.Size([1, 65, 12, 32]) 1235
RoPE forward torch.Size([1, 65, 12, 32]) 1235
RoPE forward torch.Size([1, 65, 12, 32]) 1235
RoPE forward torch.Size([1, 65, 12, 32]) 1235
RoPE forward torch.Size([1, 65, 12, 32]) 1235
RoPE forward torch.Size([1, 65, 12, 32]) 1235
RoPE forward torch.Size([1, 65, 12, 32]) 1235
5


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 1300
RoPE forward torch.Size([1, 65, 12, 32]) 1300
RoPE forward torch.Size([1, 65, 12, 32]) 1300
RoPE forward torch.Size([1, 65, 12, 32]) 1300
RoPE forward torch.Size([1, 65, 12, 32]) 1300
RoPE forward torch.Size([1, 65, 12, 32]) 1300
RoPE forward torch.Size([1, 65, 12, 32]) 1300
RoPE forward torch.Size([1, 65, 12, 32]) 1300
RoPE forward torch.Size([1, 65, 12, 32]) 1300
RoPE forward torch.Size([1, 65, 12, 32]) 1300
RoPE forward torch.Size([1, 65, 12, 32]) 1300
RoPE forward torch.Size([1, 65, 12, 32]) 1300
RoPE forward torch.Size([1, 65, 12, 32]) 1300
RoPE forward torch.Size([1, 65, 12, 32]) 1300
RoPE forward torch.Size([1, 65, 12, 32]) 1300
RoPE forward torch.Size([1, 65, 12, 32]) 1300
RoPE forward torch.Size([1, 65, 12, 32]) 1365
RoPE forward torch.Size([1, 65, 12, 32]) 1365
RoPE forward torch.Size([1, 65, 12, 32]) 1365
RoPE forward torch.Size([1, 65, 12, 32]) 1365
RoPE forward torch.Size([1, 65, 12, 32]) 1365
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 23.90it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 1430
RoPE forward torch.Size([1, 65, 12, 32]) 1430
RoPE forward torch.Size([1, 65, 12, 32]) 1430
RoPE forward torch.Size([1, 65, 12, 32]) 1430
RoPE forward torch.Size([1, 65, 12, 32]) 1430
RoPE forward torch.Size([1, 65, 12, 32]) 1430
RoPE forward torch.Size([1, 65, 12, 32]) 1495
RoPE forward torch.Size([1, 65, 12, 32]) 1495
RoPE forward torch.Size([1, 65, 12, 32]) 1495
RoPE forward torch.Size([1, 65, 12, 32]) 1495
RoPE forward torch.Size([1, 65, 12, 32]) 1495
RoPE forward torch.Size([1, 65, 12, 32]) 1495
RoPE forward torch.Size([1, 65, 12, 32]) 1495
RoPE forward torch.Size([1, 65, 12, 32]) 1495
RoPE forward torch.Size([1, 65, 12, 32]) 1495
RoPE forward torch.Size([1, 65, 12, 32]) 1495
RoPE forward torch.Size([1, 65, 12, 32]) 1495
RoPE forward torch.Size([1, 65, 12, 32]) 1495
RoPE forward torch.Size([1, 65, 12, 32]) 1495
RoPE forward torch.Size([1, 65, 12, 32]) 1495
RoPE forward torch.Size([1, 65, 12, 32]) 1495
RoPE forward torch.Size([1, 65, 12

100%|██████████| 4/4 [00:00<00:00, 24.48it/s]


6


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 1560
RoPE forward torch.Size([1, 65, 12, 32]) 1560
RoPE forward torch.Size([1, 65, 12, 32]) 1560
RoPE forward torch.Size([1, 65, 12, 32]) 1560
RoPE forward torch.Size([1, 65, 12, 32]) 1560
RoPE forward torch.Size([1, 65, 12, 32]) 1560
RoPE forward torch.Size([1, 65, 12, 32]) 1560
RoPE forward torch.Size([1, 65, 12, 32]) 1560
RoPE forward torch.Size([1, 65, 12, 32]) 1560
RoPE forward torch.Size([1, 65, 12, 32]) 1560
RoPE forward torch.Size([1, 65, 12, 32]) 1560
RoPE forward torch.Size([1, 65, 12, 32]) 1560
RoPE forward torch.Size([1, 65, 12, 32]) 1560
RoPE forward torch.Size([1, 65, 12, 32]) 1560
RoPE forward torch.Size([1, 65, 12, 32]) 1560
RoPE forward torch.Size([1, 65, 12, 32]) 1560
RoPE forward torch.Size([1, 65, 12, 32]) 1625
RoPE forward torch.Size([1, 65, 12, 32]) 1625
RoPE forward torch.Size([1, 65, 12, 32]) 1625
RoPE forward torch.Size([1, 65, 12, 32]) 1625
RoPE forward torch.Size([1, 65, 12, 32]) 1625
RoPE forward torch.Size([1, 65, 12

100%|██████████| 4/4 [00:00<00:00, 26.88it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 1690
RoPE forward torch.Size([1, 65, 12, 32]) 1690
RoPE forward torch.Size([1, 65, 12, 32]) 1755
RoPE forward torch.Size([1, 65, 12, 32]) 1755
RoPE forward torch.Size([1, 65, 12, 32]) 1755
RoPE forward torch.Size([1, 65, 12, 32]) 1755
RoPE forward torch.Size([1, 65, 12, 32]) 1755
RoPE forward torch.Size([1, 65, 12, 32]) 1755
RoPE forward torch.Size([1, 65, 12, 32]) 1755
RoPE forward torch.Size([1, 65, 12, 32]) 1755
RoPE forward torch.Size([1, 65, 12, 32]) 1755
RoPE forward torch.Size([1, 65, 12, 32]) 1755
RoPE forward torch.Size([1, 65, 12, 32]) 1755
RoPE forward torch.Size([1, 65, 12, 32]) 1755
RoPE forward torch.Size([1, 65, 12, 32]) 1755
RoPE forward torch.Size([1, 65, 12, 32]) 1755
RoPE forward torch.Size([1, 65, 12, 32]) 1755
RoPE forward torch.Size([1, 65, 12, 32]) 1755
7


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 1820
RoPE forward torch.Size([1, 65, 12, 32]) 1820
RoPE forward torch.Size([1, 65, 12, 32]) 1820
RoPE forward torch.Size([1, 65, 12, 32]) 1820
RoPE forward torch.Size([1, 65, 12, 32]) 1820
RoPE forward torch.Size([1, 65, 12, 32]) 1820
RoPE forward torch.Size([1, 65, 12, 32]) 1820
RoPE forward torch.Size([1, 65, 12, 32]) 1820
RoPE forward torch.Size([1, 65, 12, 32]) 1820
RoPE forward torch.Size([1, 65, 12, 32]) 1820
RoPE forward torch.Size([1, 65, 12, 32]) 1820
RoPE forward torch.Size([1, 65, 12, 32]) 1820
RoPE forward torch.Size([1, 65, 12, 32]) 1820
RoPE forward torch.Size([1, 65, 12, 32]) 1820
RoPE forward torch.Size([1, 65, 12, 32]) 1820
RoPE forward torch.Size([1, 65, 12, 32]) 1820
RoPE forward torch.Size([1, 65, 12, 32]) 1885
RoPE forward torch.Size([1, 65, 12, 32]) 1885
RoPE forward torch.Size([1, 65, 12, 32]) 1885
RoPE forward torch.Size([1, 65, 12, 32]) 1885
RoPE forward torch.Size([1, 65, 12, 32]) 1885
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 24.69it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 1950
RoPE forward torch.Size([1, 65, 12, 32]) 1950
RoPE forward torch.Size([1, 65, 12, 32]) 2015
RoPE forward torch.Size([1, 65, 12, 32]) 2015
RoPE forward torch.Size([1, 65, 12, 32]) 2015
RoPE forward torch.Size([1, 65, 12, 32]) 2015
RoPE forward torch.Size([1, 65, 12, 32]) 2015
RoPE forward torch.Size([1, 65, 12, 32]) 2015
RoPE forward torch.Size([1, 65, 12, 32]) 2015
RoPE forward torch.Size([1, 65, 12, 32]) 2015
RoPE forward torch.Size([1, 65, 12, 32]) 2015
RoPE forward torch.Size([1, 65, 12, 32]) 2015
RoPE forward torch.Size([1, 65, 12, 32]) 2015
RoPE forward torch.Size([1, 65, 12, 32]) 2015


100%|██████████| 4/4 [00:00<00:00, 23.32it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 2015
RoPE forward torch.Size([1, 65, 12, 32]) 2015
RoPE forward torch.Size([1, 65, 12, 32]) 2015
RoPE forward torch.Size([1, 65, 12, 32]) 2015
8


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 2080
RoPE forward torch.Size([1, 65, 12, 32]) 2080
RoPE forward torch.Size([1, 65, 12, 32]) 2080
RoPE forward torch.Size([1, 65, 12, 32]) 2080
RoPE forward torch.Size([1, 65, 12, 32]) 2080
RoPE forward torch.Size([1, 65, 12, 32]) 2080
RoPE forward torch.Size([1, 65, 12, 32]) 2080
RoPE forward torch.Size([1, 65, 12, 32]) 2080
RoPE forward torch.Size([1, 65, 12, 32]) 2080
RoPE forward torch.Size([1, 65, 12, 32]) 2080
RoPE forward torch.Size([1, 65, 12, 32]) 2080
RoPE forward torch.Size([1, 65, 12, 32]) 2080
RoPE forward torch.Size([1, 65, 12, 32]) 2080
RoPE forward torch.Size([1, 65, 12, 32]) 2080
RoPE forward torch.Size([1, 65, 12, 32]) 2080
RoPE forward torch.Size([1, 65, 12, 32]) 2080
RoPE forward torch.Size([1, 65, 12, 32]) 2145
RoPE forward torch.Size([1, 65, 12, 32]) 2145
RoPE forward torch.Size([1, 65, 12, 32]) 2145
RoPE forward torch.Size([1, 65, 12, 32]) 2145
RoPE forward torch.Size([1, 65, 12, 32]) 2145
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 23.84it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 2210
RoPE forward torch.Size([1, 65, 12, 32]) 2210
RoPE forward torch.Size([1, 65, 12, 32]) 2210
RoPE forward torch.Size([1, 65, 12, 32]) 2210
RoPE forward torch.Size([1, 65, 12, 32]) 2210
RoPE forward torch.Size([1, 65, 12, 32]) 2210
RoPE forward torch.Size([1, 65, 12, 32]) 2210
RoPE forward torch.Size([1, 65, 12, 32]) 2210
RoPE forward torch.Size([1, 65, 12, 32]) 2210
RoPE forward torch.Size([1, 65, 12, 32]) 2210
RoPE forward torch.Size([1, 65, 12, 32]) 2210
RoPE forward torch.Size([1, 65, 12, 32]) 2210
RoPE forward torch.Size([1, 65, 12, 32]) 2210
RoPE forward torch.Size([1, 65, 12, 32]) 2210
RoPE forward torch.Size([1, 65, 12, 32]) 2210
RoPE forward torch.Size([1, 65, 12, 32]) 2210
RoPE forward torch.Size([1, 65, 12, 32]) 2275
RoPE forward torch.Size([1, 65, 12, 32]) 2275
RoPE forward torch.Size([1, 65, 12, 32]) 2275
RoPE forward torch.Size([1, 65, 12, 32]) 2275
RoPE forward torch.Size([1, 65, 12, 32]) 2275
RoPE forward torch.Size([1, 65, 12

100%|██████████| 4/4 [00:00<00:00, 23.52it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 2275
RoPE forward torch.Size([1, 65, 12, 32]) 2275
RoPE forward torch.Size([1, 65, 12, 32]) 2275
RoPE forward torch.Size([1, 65, 12, 32]) 2275
RoPE forward torch.Size([1, 65, 12, 32]) 2275
RoPE forward torch.Size([1, 65, 12, 32]) 2275
9


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 2340
RoPE forward torch.Size([1, 65, 12, 32]) 2340
RoPE forward torch.Size([1, 65, 12, 32]) 2340
RoPE forward torch.Size([1, 65, 12, 32]) 2340
RoPE forward torch.Size([1, 65, 12, 32]) 2340
RoPE forward torch.Size([1, 65, 12, 32]) 2340
RoPE forward torch.Size([1, 65, 12, 32]) 2340
RoPE forward torch.Size([1, 65, 12, 32]) 2340
RoPE forward torch.Size([1, 65, 12, 32]) 2340
RoPE forward torch.Size([1, 65, 12, 32]) 2340
RoPE forward torch.Size([1, 65, 12, 32]) 2340
RoPE forward torch.Size([1, 65, 12, 32]) 2340
RoPE forward torch.Size([1, 65, 12, 32]) 2340
RoPE forward torch.Size([1, 65, 12, 32]) 2340
RoPE forward torch.Size([1, 65, 12, 32]) 2340
RoPE forward torch.Size([1, 65, 12, 32]) 2340
RoPE forward torch.Size([1, 65, 12, 32]) 2405
RoPE forward torch.Size([1, 65, 12, 32]) 2405
RoPE forward torch.Size([1, 65, 12, 32]) 2405
RoPE forward torch.Size([1, 65, 12, 32]) 2405
RoPE forward torch.Size([1, 65, 12, 32]) 2405
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 23.41it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 2470
RoPE forward torch.Size([1, 65, 12, 32]) 2470
RoPE forward torch.Size([1, 65, 12, 32]) 2470
RoPE forward torch.Size([1, 65, 12, 32]) 2470
RoPE forward torch.Size([1, 65, 12, 32]) 2535
RoPE forward torch.Size([1, 65, 12, 32]) 2535
RoPE forward torch.Size([1, 65, 12, 32]) 2535
RoPE forward torch.Size([1, 65, 12, 32]) 2535
RoPE forward torch.Size([1, 65, 12, 32]) 2535
RoPE forward torch.Size([1, 65, 12, 32]) 2535
RoPE forward torch.Size([1, 65, 12, 32]) 2535
RoPE forward torch.Size([1, 65, 12, 32]) 2535
RoPE forward torch.Size([1, 65, 12, 32]) 2535
RoPE forward torch.Size([1, 65, 12, 32]) 2535
RoPE forward torch.Size([1, 65, 12, 32]) 2535
RoPE forward torch.Size([1, 65, 12, 32]) 2535


100%|██████████| 4/4 [00:00<00:00, 23.07it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 2535
RoPE forward torch.Size([1, 65, 12, 32]) 2535
RoPE forward torch.Size([1, 65, 12, 32]) 2535
RoPE forward torch.Size([1, 65, 12, 32]) 2535
10


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 2600
RoPE forward torch.Size([1, 65, 12, 32]) 2600
RoPE forward torch.Size([1, 65, 12, 32]) 2600
RoPE forward torch.Size([1, 65, 12, 32]) 2600
RoPE forward torch.Size([1, 65, 12, 32]) 2600
RoPE forward torch.Size([1, 65, 12, 32]) 2600
RoPE forward torch.Size([1, 65, 12, 32]) 2600
RoPE forward torch.Size([1, 65, 12, 32]) 2600
RoPE forward torch.Size([1, 65, 12, 32]) 2600
RoPE forward torch.Size([1, 65, 12, 32]) 2600
RoPE forward torch.Size([1, 65, 12, 32]) 2600
RoPE forward torch.Size([1, 65, 12, 32]) 2600
RoPE forward torch.Size([1, 65, 12, 32]) 2600
RoPE forward torch.Size([1, 65, 12, 32]) 2600
RoPE forward torch.Size([1, 65, 12, 32]) 2600
RoPE forward torch.Size([1, 65, 12, 32]) 2600
RoPE forward torch.Size([1, 65, 12, 32]) 2665
RoPE forward torch.Size([1, 65, 12, 32]) 2665
RoPE forward torch.Size([1, 65, 12, 32]) 2665
RoPE forward torch.Size([1, 65, 12, 32]) 2665
RoPE forward torch.Size([1, 65, 12, 32]) 2665
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 23.42it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 2730
RoPE forward torch.Size([1, 65, 12, 32]) 2730
RoPE forward torch.Size([1, 65, 12, 32]) 2730
RoPE forward torch.Size([1, 65, 12, 32]) 2730
RoPE forward torch.Size([1, 65, 12, 32]) 2730
RoPE forward torch.Size([1, 65, 12, 32]) 2730
RoPE forward torch.Size([1, 65, 12, 32]) 2730
RoPE forward torch.Size([1, 65, 12, 32]) 2730
RoPE forward torch.Size([1, 65, 12, 32]) 2730
RoPE forward torch.Size([1, 65, 12, 32]) 2730
RoPE forward torch.Size([1, 65, 12, 32]) 2730
RoPE forward torch.Size([1, 65, 12, 32]) 2730
RoPE forward torch.Size([1, 65, 12, 32]) 2795
RoPE forward torch.Size([1, 65, 12, 32]) 2795
RoPE forward torch.Size([1, 65, 12, 32]) 2795
RoPE forward torch.Size([1, 65, 12, 32]) 2795
RoPE forward torch.Size([1, 65, 12, 32]) 2795
RoPE forward torch.Size([1, 65, 12, 32]) 2795


100%|██████████| 4/4 [00:00<00:00, 23.11it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 2795
RoPE forward torch.Size([1, 65, 12, 32]) 2795
RoPE forward torch.Size([1, 65, 12, 32]) 2795
RoPE forward torch.Size([1, 65, 12, 32]) 2795
RoPE forward torch.Size([1, 65, 12, 32]) 2795
RoPE forward torch.Size([1, 65, 12, 32]) 2795
RoPE forward torch.Size([1, 65, 12, 32]) 2795
RoPE forward torch.Size([1, 65, 12, 32]) 2795
RoPE forward torch.Size([1, 65, 12, 32]) 2795
RoPE forward torch.Size([1, 65, 12, 32]) 2795
11


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 2860
RoPE forward torch.Size([1, 65, 12, 32]) 2860
RoPE forward torch.Size([1, 65, 12, 32]) 2860
RoPE forward torch.Size([1, 65, 12, 32]) 2860
RoPE forward torch.Size([1, 65, 12, 32]) 2860
RoPE forward torch.Size([1, 65, 12, 32]) 2860
RoPE forward torch.Size([1, 65, 12, 32]) 2860
RoPE forward torch.Size([1, 65, 12, 32]) 2860
RoPE forward torch.Size([1, 65, 12, 32]) 2860
RoPE forward torch.Size([1, 65, 12, 32]) 2860
RoPE forward torch.Size([1, 65, 12, 32]) 2860
RoPE forward torch.Size([1, 65, 12, 32]) 2860
RoPE forward torch.Size([1, 65, 12, 32]) 2860
RoPE forward torch.Size([1, 65, 12, 32]) 2860
RoPE forward torch.Size([1, 65, 12, 32]) 2860
RoPE forward torch.Size([1, 65, 12, 32]) 2860
RoPE forward torch.Size([1, 65, 12, 32]) 2925
RoPE forward torch.Size([1, 65, 12, 32]) 2925
RoPE forward torch.Size([1, 65, 12, 32]) 2925
RoPE forward torch.Size([1, 65, 12, 32]) 2925
RoPE forward torch.Size([1, 65, 12, 32]) 2925
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 23.21it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 3055
RoPE forward torch.Size([1, 65, 12, 32]) 3055
RoPE forward torch.Size([1, 65, 12, 32]) 3055
RoPE forward torch.Size([1, 65, 12, 32]) 3055
RoPE forward torch.Size([1, 65, 12, 32]) 3055
RoPE forward torch.Size([1, 65, 12, 32]) 3055
RoPE forward torch.Size([1, 65, 12, 32]) 3055
RoPE forward torch.Size([1, 65, 12, 32]) 3055
RoPE forward torch.Size([1, 65, 12, 32]) 3055
RoPE forward torch.Size([1, 65, 12, 32]) 3055
RoPE forward torch.Size([1, 65, 12, 32]) 3055
RoPE forward torch.Size([1, 65, 12, 32]) 3055
RoPE forward torch.Size([1, 65, 12, 32]) 3055
RoPE forward torch.Size([1, 65, 12, 32]) 3055
RoPE forward torch.Size([1, 65, 12, 32]) 3055
RoPE forward torch.Size([1, 65, 12, 32]) 3055


100%|██████████| 4/4 [00:00<00:00, 22.74it/s]


12


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 3120
RoPE forward torch.Size([1, 65, 12, 32]) 3120
RoPE forward torch.Size([1, 65, 12, 32]) 3120
RoPE forward torch.Size([1, 65, 12, 32]) 3120
RoPE forward torch.Size([1, 65, 12, 32]) 3120
RoPE forward torch.Size([1, 65, 12, 32]) 3120
RoPE forward torch.Size([1, 65, 12, 32]) 3120
RoPE forward torch.Size([1, 65, 12, 32]) 3120
RoPE forward torch.Size([1, 65, 12, 32]) 3120
RoPE forward torch.Size([1, 65, 12, 32]) 3120
RoPE forward torch.Size([1, 65, 12, 32]) 3120
RoPE forward torch.Size([1, 65, 12, 32]) 3120
RoPE forward torch.Size([1, 65, 12, 32]) 3120
RoPE forward torch.Size([1, 65, 12, 32]) 3120
RoPE forward torch.Size([1, 65, 12, 32]) 3120
RoPE forward torch.Size([1, 65, 12, 32]) 3120
RoPE forward torch.Size([1, 65, 12, 32]) 3185
RoPE forward torch.Size([1, 65, 12, 32]) 3185
RoPE forward torch.Size([1, 65, 12, 32]) 3185
RoPE forward torch.Size([1, 65, 12, 32]) 3185
RoPE forward torch.Size([1, 65, 12, 32]) 3185
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 19.88it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 3250
RoPE forward torch.Size([1, 65, 12, 32]) 3250
RoPE forward torch.Size([1, 65, 12, 32]) 3250
RoPE forward torch.Size([1, 65, 12, 32]) 3250
RoPE forward torch.Size([1, 65, 12, 32]) 3250
RoPE forward torch.Size([1, 65, 12, 32]) 3250
RoPE forward torch.Size([1, 65, 12, 32]) 3250
RoPE forward torch.Size([1, 65, 12, 32]) 3250
RoPE forward torch.Size([1, 65, 12, 32]) 3250
RoPE forward torch.Size([1, 65, 12, 32]) 3250
RoPE forward torch.Size([1, 65, 12, 32]) 3250
RoPE forward torch.Size([1, 65, 12, 32]) 3250
RoPE forward torch.Size([1, 65, 12, 32]) 3315
RoPE forward torch.Size([1, 65, 12, 32]) 3315
RoPE forward torch.Size([1, 65, 12, 32]) 3315
RoPE forward torch.Size([1, 65, 12, 32]) 3315
RoPE forward torch.Size([1, 65, 12, 32]) 3315
RoPE forward torch.Size([1, 65, 12, 32]) 3315
RoPE forward torch.Size([1, 65, 12, 32]) 3315
RoPE forward torch.Size([1, 65, 12, 32]) 3315
RoPE forward torch.Size([1, 65, 12, 32]) 3315
RoPE forward torch.Size([1, 65, 12

100%|██████████| 4/4 [00:00<00:00, 20.02it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 3315
RoPE forward torch.Size([1, 65, 12, 32]) 3315





13


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 3380
RoPE forward torch.Size([1, 65, 12, 32]) 3380
RoPE forward torch.Size([1, 65, 12, 32]) 3380
RoPE forward torch.Size([1, 65, 12, 32]) 3380
RoPE forward torch.Size([1, 65, 12, 32]) 3380
RoPE forward torch.Size([1, 65, 12, 32]) 3380
RoPE forward torch.Size([1, 65, 12, 32]) 3380
RoPE forward torch.Size([1, 65, 12, 32]) 3380
RoPE forward torch.Size([1, 65, 12, 32]) 3380
RoPE forward torch.Size([1, 65, 12, 32]) 3380
RoPE forward torch.Size([1, 65, 12, 32]) 3380
RoPE forward torch.Size([1, 65, 12, 32]) 3380
RoPE forward torch.Size([1, 65, 12, 32]) 3380
RoPE forward torch.Size([1, 65, 12, 32]) 3380
RoPE forward torch.Size([1, 65, 12, 32]) 3380
RoPE forward torch.Size([1, 65, 12, 32]) 3380
RoPE forward torch.Size([1, 65, 12, 32]) 3445
RoPE forward torch.Size([1, 65, 12, 32]) 3445
RoPE forward torch.Size([1, 65, 12, 32]) 3445
RoPE forward torch.Size([1, 65, 12, 32]) 3445
RoPE forward torch.Size([1, 65, 12, 32]) 3445
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 20.53it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 3510
RoPE forward torch.Size([1, 65, 12, 32]) 3510
RoPE forward torch.Size([1, 65, 12, 32]) 3510
RoPE forward torch.Size([1, 65, 12, 32]) 3510
RoPE forward torch.Size([1, 65, 12, 32]) 3510
RoPE forward torch.Size([1, 65, 12, 32]) 3510
RoPE forward torch.Size([1, 65, 12, 32]) 3510
RoPE forward torch.Size([1, 65, 12, 32]) 3510
RoPE forward torch.Size([1, 65, 12, 32]) 3510
RoPE forward torch.Size([1, 65, 12, 32]) 3510
RoPE forward torch.Size([1, 65, 12, 32]) 3575
RoPE forward torch.Size([1, 65, 12, 32]) 3575
RoPE forward torch.Size([1, 65, 12, 32]) 3575
RoPE forward torch.Size([1, 65, 12, 32]) 3575
RoPE forward torch.Size([1, 65, 12, 32]) 3575
RoPE forward torch.Size([1, 65, 12, 32]) 3575
RoPE forward torch.Size([1, 65, 12, 32]) 3575
RoPE forward torch.Size([1, 65, 12, 32]) 3575
RoPE forward torch.Size([1, 65, 12, 32]) 3575
RoPE forward torch.Size([1, 65, 12, 32]) 3575
RoPE forward torch.Size([1, 65, 12, 32]) 3575
RoPE forward torch.Size([1, 65, 12

100%|██████████| 4/4 [00:00<00:00, 20.40it/s]


14


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 3640
RoPE forward torch.Size([1, 65, 12, 32]) 3640
RoPE forward torch.Size([1, 65, 12, 32]) 3640
RoPE forward torch.Size([1, 65, 12, 32]) 3640
RoPE forward torch.Size([1, 65, 12, 32]) 3640
RoPE forward torch.Size([1, 65, 12, 32]) 3640
RoPE forward torch.Size([1, 65, 12, 32]) 3640
RoPE forward torch.Size([1, 65, 12, 32]) 3640
RoPE forward torch.Size([1, 65, 12, 32]) 3640
RoPE forward torch.Size([1, 65, 12, 32]) 3640
RoPE forward torch.Size([1, 65, 12, 32]) 3640
RoPE forward torch.Size([1, 65, 12, 32]) 3640
RoPE forward torch.Size([1, 65, 12, 32]) 3640
RoPE forward torch.Size([1, 65, 12, 32]) 3640
RoPE forward torch.Size([1, 65, 12, 32]) 3640
RoPE forward torch.Size([1, 65, 12, 32]) 3640
RoPE forward torch.Size([1, 65, 12, 32]) 3705
RoPE forward torch.Size([1, 65, 12, 32]) 3705
RoPE forward torch.Size([1, 65, 12, 32]) 3705
RoPE forward torch.Size([1, 65, 12, 32]) 3705
RoPE forward torch.Size([1, 65, 12, 32]) 3705
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 24.82it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 3770
RoPE forward torch.Size([1, 65, 12, 32]) 3770
RoPE forward torch.Size([1, 65, 12, 32]) 3770
RoPE forward torch.Size([1, 65, 12, 32]) 3835
RoPE forward torch.Size([1, 65, 12, 32]) 3835
RoPE forward torch.Size([1, 65, 12, 32]) 3835
RoPE forward torch.Size([1, 65, 12, 32]) 3835
RoPE forward torch.Size([1, 65, 12, 32]) 3835
RoPE forward torch.Size([1, 65, 12, 32]) 3835
RoPE forward torch.Size([1, 65, 12, 32]) 3835
RoPE forward torch.Size([1, 65, 12, 32]) 3835
RoPE forward torch.Size([1, 65, 12, 32]) 3835
RoPE forward torch.Size([1, 65, 12, 32]) 3835
RoPE forward torch.Size([1, 65, 12, 32]) 3835
RoPE forward torch.Size([1, 65, 12, 32]) 3835
RoPE forward torch.Size([1, 65, 12, 32]) 3835
RoPE forward torch.Size([1, 65, 12, 32]) 3835
RoPE forward torch.Size([1, 65, 12, 32]) 3835
RoPE forward torch.Size([1, 65, 12, 32]) 3835


100%|██████████| 4/4 [00:00<00:00, 24.51it/s]


15


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 3900
RoPE forward torch.Size([1, 65, 12, 32]) 3900
RoPE forward torch.Size([1, 65, 12, 32]) 3900
RoPE forward torch.Size([1, 65, 12, 32]) 3900
RoPE forward torch.Size([1, 65, 12, 32]) 3900
RoPE forward torch.Size([1, 65, 12, 32]) 3900
RoPE forward torch.Size([1, 65, 12, 32]) 3900
RoPE forward torch.Size([1, 65, 12, 32]) 3900
RoPE forward torch.Size([1, 65, 12, 32]) 3900
RoPE forward torch.Size([1, 65, 12, 32]) 3900
RoPE forward torch.Size([1, 65, 12, 32]) 3900
RoPE forward torch.Size([1, 65, 12, 32]) 3900
RoPE forward torch.Size([1, 65, 12, 32]) 3900
RoPE forward torch.Size([1, 65, 12, 32]) 3900
RoPE forward torch.Size([1, 65, 12, 32]) 3900
RoPE forward torch.Size([1, 65, 12, 32]) 3900
RoPE forward torch.Size([1, 65, 12, 32]) 3965
RoPE forward torch.Size([1, 65, 12, 32]) 3965
RoPE forward torch.Size([1, 65, 12, 32]) 3965
RoPE forward torch.Size([1, 65, 12, 32]) 3965
RoPE forward torch.Size([1, 65, 12, 32]) 3965
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 21.15it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 4030
RoPE forward torch.Size([1, 65, 12, 32]) 4030
RoPE forward torch.Size([1, 65, 12, 32]) 4095
RoPE forward torch.Size([1, 65, 12, 32]) 4095
RoPE forward torch.Size([1, 65, 12, 32]) 4095
RoPE forward torch.Size([1, 65, 12, 32]) 4095
RoPE forward torch.Size([1, 65, 12, 32]) 4095
RoPE forward torch.Size([1, 65, 12, 32]) 4095


100%|██████████| 4/4 [00:00<00:00, 21.05it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 4095
RoPE forward torch.Size([1, 65, 12, 32]) 4095
RoPE forward torch.Size([1, 65, 12, 32]) 4095
RoPE forward torch.Size([1, 65, 12, 32]) 4095
RoPE forward torch.Size([1, 65, 12, 32]) 4095
RoPE forward torch.Size([1, 65, 12, 32]) 4095
RoPE forward torch.Size([1, 65, 12, 32]) 4095
RoPE forward torch.Size([1, 65, 12, 32]) 4095
RoPE forward torch.Size([1, 65, 12, 32]) 4095
RoPE forward torch.Size([1, 65, 12, 32]) 4095
16


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 4160
RoPE forward torch.Size([1, 65, 12, 32]) 4160
RoPE forward torch.Size([1, 65, 12, 32]) 4160
RoPE forward torch.Size([1, 65, 12, 32]) 4160
RoPE forward torch.Size([1, 65, 12, 32]) 4160
RoPE forward torch.Size([1, 65, 12, 32]) 4160
RoPE forward torch.Size([1, 65, 12, 32]) 4160
RoPE forward torch.Size([1, 65, 12, 32]) 4160
RoPE forward torch.Size([1, 65, 12, 32]) 4160
RoPE forward torch.Size([1, 65, 12, 32]) 4160
RoPE forward torch.Size([1, 65, 12, 32]) 4160
RoPE forward torch.Size([1, 65, 12, 32]) 4160
RoPE forward torch.Size([1, 65, 12, 32]) 4160
RoPE forward torch.Size([1, 65, 12, 32]) 4160
RoPE forward torch.Size([1, 65, 12, 32]) 4160
RoPE forward torch.Size([1, 65, 12, 32]) 4160
RoPE forward torch.Size([1, 65, 12, 32]) 4225
RoPE forward torch.Size([1, 65, 12, 32]) 4225
RoPE forward torch.Size([1, 65, 12, 32]) 4225
RoPE forward torch.Size([1, 65, 12, 32]) 4225
RoPE forward torch.Size([1, 65, 12, 32]) 4225
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 21.18it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 4290
RoPE forward torch.Size([1, 65, 12, 32]) 4290
RoPE forward torch.Size([1, 65, 12, 32]) 4290
RoPE forward torch.Size([1, 65, 12, 32]) 4290
RoPE forward torch.Size([1, 65, 12, 32]) 4290
RoPE forward torch.Size([1, 65, 12, 32]) 4290
RoPE forward torch.Size([1, 65, 12, 32]) 4290
RoPE forward torch.Size([1, 65, 12, 32]) 4290
RoPE forward torch.Size([1, 65, 12, 32]) 4355
RoPE forward torch.Size([1, 65, 12, 32]) 4355
RoPE forward torch.Size([1, 65, 12, 32]) 4355
RoPE forward torch.Size([1, 65, 12, 32]) 4355
RoPE forward torch.Size([1, 65, 12, 32]) 4355
RoPE forward torch.Size([1, 65, 12, 32]) 4355
RoPE forward torch.Size([1, 65, 12, 32]) 4355
RoPE forward torch.Size([1, 65, 12, 32]) 4355
RoPE forward torch.Size([1, 65, 12, 32]) 4355
RoPE forward torch.Size([1, 65, 12, 32]) 4355


100%|██████████| 4/4 [00:00<00:00, 21.23it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 4355
RoPE forward torch.Size([1, 65, 12, 32]) 4355
RoPE forward torch.Size([1, 65, 12, 32]) 4355
RoPE forward torch.Size([1, 65, 12, 32]) 4355
RoPE forward torch.Size([1, 65, 12, 32]) 4355
RoPE forward torch.Size([1, 65, 12, 32]) 4355
17


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 4420
RoPE forward torch.Size([1, 65, 12, 32]) 4420
RoPE forward torch.Size([1, 65, 12, 32]) 4420
RoPE forward torch.Size([1, 65, 12, 32]) 4420
RoPE forward torch.Size([1, 65, 12, 32]) 4420
RoPE forward torch.Size([1, 65, 12, 32]) 4420
RoPE forward torch.Size([1, 65, 12, 32]) 4420
RoPE forward torch.Size([1, 65, 12, 32]) 4420
RoPE forward torch.Size([1, 65, 12, 32]) 4420
RoPE forward torch.Size([1, 65, 12, 32]) 4420
RoPE forward torch.Size([1, 65, 12, 32]) 4420
RoPE forward torch.Size([1, 65, 12, 32]) 4420
RoPE forward torch.Size([1, 65, 12, 32]) 4420
RoPE forward torch.Size([1, 65, 12, 32]) 4420
RoPE forward torch.Size([1, 65, 12, 32]) 4420
RoPE forward torch.Size([1, 65, 12, 32]) 4420
RoPE forward torch.Size([1, 65, 12, 32]) 4485
RoPE forward torch.Size([1, 65, 12, 32]) 4485
RoPE forward torch.Size([1, 65, 12, 32]) 4485
RoPE forward torch.Size([1, 65, 12, 32]) 4485
RoPE forward torch.Size([1, 65, 12, 32]) 4485
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 21.29it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 4550
RoPE forward torch.Size([1, 65, 12, 32]) 4550
RoPE forward torch.Size([1, 65, 12, 32]) 4615
RoPE forward torch.Size([1, 65, 12, 32]) 4615
RoPE forward torch.Size([1, 65, 12, 32]) 4615
RoPE forward torch.Size([1, 65, 12, 32]) 4615
RoPE forward torch.Size([1, 65, 12, 32]) 4615
RoPE forward torch.Size([1, 65, 12, 32]) 4615
RoPE forward torch.Size([1, 65, 12, 32]) 4615
RoPE forward torch.Size([1, 65, 12, 32]) 4615
RoPE forward torch.Size([1, 65, 12, 32]) 4615
RoPE forward torch.Size([1, 65, 12, 32]) 4615
RoPE forward torch.Size([1, 65, 12, 32]) 4615
RoPE forward torch.Size([1, 65, 12, 32]) 4615
RoPE forward torch.Size([1, 65, 12, 32]) 4615
RoPE forward torch.Size([1, 65, 12, 32]) 4615
RoPE forward torch.Size([1, 65, 12, 32]) 4615
RoPE forward torch.Size([1, 65, 12, 32]) 4615


100%|██████████| 4/4 [00:00<00:00, 21.46it/s]


18


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 4680
RoPE forward torch.Size([1, 65, 12, 32]) 4680
RoPE forward torch.Size([1, 65, 12, 32]) 4680
RoPE forward torch.Size([1, 65, 12, 32]) 4680
RoPE forward torch.Size([1, 65, 12, 32]) 4680
RoPE forward torch.Size([1, 65, 12, 32]) 4680
RoPE forward torch.Size([1, 65, 12, 32]) 4680
RoPE forward torch.Size([1, 65, 12, 32]) 4680
RoPE forward torch.Size([1, 65, 12, 32]) 4680
RoPE forward torch.Size([1, 65, 12, 32]) 4680
RoPE forward torch.Size([1, 65, 12, 32]) 4680
RoPE forward torch.Size([1, 65, 12, 32]) 4680
RoPE forward torch.Size([1, 65, 12, 32]) 4680
RoPE forward torch.Size([1, 65, 12, 32]) 4680
RoPE forward torch.Size([1, 65, 12, 32]) 4680
RoPE forward torch.Size([1, 65, 12, 32]) 4680
RoPE forward torch.Size([1, 65, 12, 32]) 4745
RoPE forward torch.Size([1, 65, 12, 32]) 4745
RoPE forward torch.Size([1, 65, 12, 32]) 4745
RoPE forward torch.Size([1, 65, 12, 32]) 4745
RoPE forward torch.Size([1, 65, 12, 32]) 4745
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 24.28it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 4810
RoPE forward torch.Size([1, 65, 12, 32]) 4810
RoPE forward torch.Size([1, 65, 12, 32]) 4810
RoPE forward torch.Size([1, 65, 12, 32]) 4810
RoPE forward torch.Size([1, 65, 12, 32]) 4810
RoPE forward torch.Size([1, 65, 12, 32]) 4810
RoPE forward torch.Size([1, 65, 12, 32]) 4810
RoPE forward torch.Size([1, 65, 12, 32]) 4810
RoPE forward torch.Size([1, 65, 12, 32]) 4810
RoPE forward torch.Size([1, 65, 12, 32]) 4810
RoPE forward torch.Size([1, 65, 12, 32]) 4810
RoPE forward torch.Size([1, 65, 12, 32]) 4810
RoPE forward torch.Size([1, 65, 12, 32]) 4875
RoPE forward torch.Size([1, 65, 12, 32]) 4875
RoPE forward torch.Size([1, 65, 12, 32]) 4875
RoPE forward torch.Size([1, 65, 12, 32]) 4875
RoPE forward torch.Size([1, 65, 12, 32]) 4875
RoPE forward torch.Size([1, 65, 12, 32]) 4875
RoPE forward torch.Size([1, 65, 12, 32]) 4875
RoPE forward torch.Size([1, 65, 12, 32]) 4875
RoPE forward torch.Size([1, 65, 12, 32]) 4875
RoPE forward torch.Size([1, 65, 12

100%|██████████| 4/4 [00:00<00:00, 24.28it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 4875
RoPE forward torch.Size([1, 65, 12, 32]) 4875
RoPE forward torch.Size([1, 65, 12, 32]) 4875
RoPE forward torch.Size([1, 65, 12, 32]) 4875
RoPE forward torch.Size([1, 65, 12, 32]) 4875
RoPE forward torch.Size([1, 65, 12, 32]) 4875
19


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 4940
RoPE forward torch.Size([1, 65, 12, 32]) 4940
RoPE forward torch.Size([1, 65, 12, 32]) 4940
RoPE forward torch.Size([1, 65, 12, 32]) 4940
RoPE forward torch.Size([1, 65, 12, 32]) 4940
RoPE forward torch.Size([1, 65, 12, 32]) 4940
RoPE forward torch.Size([1, 65, 12, 32]) 4940
RoPE forward torch.Size([1, 65, 12, 32]) 4940
RoPE forward torch.Size([1, 65, 12, 32]) 4940
RoPE forward torch.Size([1, 65, 12, 32]) 4940
RoPE forward torch.Size([1, 65, 12, 32]) 4940
RoPE forward torch.Size([1, 65, 12, 32]) 4940
RoPE forward torch.Size([1, 65, 12, 32]) 4940
RoPE forward torch.Size([1, 65, 12, 32]) 4940
RoPE forward torch.Size([1, 65, 12, 32]) 4940
RoPE forward torch.Size([1, 65, 12, 32]) 4940
RoPE forward torch.Size([1, 65, 12, 32]) 5005
RoPE forward torch.Size([1, 65, 12, 32]) 5005
RoPE forward torch.Size([1, 65, 12, 32]) 5005
RoPE forward torch.Size([1, 65, 12, 32]) 5005
RoPE forward torch.Size([1, 65, 12, 32]) 5005
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 20.54it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 5070
RoPE forward torch.Size([1, 65, 12, 32]) 5070
RoPE forward torch.Size([1, 65, 12, 32]) 5070
RoPE forward torch.Size([1, 65, 12, 32]) 5070
RoPE forward torch.Size([1, 65, 12, 32]) 5070
RoPE forward torch.Size([1, 65, 12, 32]) 5070
RoPE forward torch.Size([1, 65, 12, 32]) 5135
RoPE forward torch.Size([1, 65, 12, 32]) 5135
RoPE forward torch.Size([1, 65, 12, 32]) 5135
RoPE forward torch.Size([1, 65, 12, 32]) 5135
RoPE forward torch.Size([1, 65, 12, 32]) 5135
RoPE forward torch.Size([1, 65, 12, 32]) 5135
RoPE forward torch.Size([1, 65, 12, 32]) 5135
RoPE forward torch.Size([1, 65, 12, 32]) 5135
RoPE forward torch.Size([1, 65, 12, 32]) 5135
RoPE forward torch.Size([1, 65, 12, 32]) 5135
RoPE forward torch.Size([1, 65, 12, 32]) 5135
RoPE forward torch.Size([1, 65, 12, 32]) 5135
RoPE forward torch.Size([1, 65, 12, 32]) 5135
RoPE forward torch.Size([1, 65, 12, 32]) 5135
RoPE forward torch.Size([1, 65, 12, 32]) 5135
RoPE forward torch.Size([1, 65, 12

100%|██████████| 4/4 [00:00<00:00, 21.05it/s]


20


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 5200
RoPE forward torch.Size([1, 65, 12, 32]) 5200
RoPE forward torch.Size([1, 65, 12, 32]) 5200
RoPE forward torch.Size([1, 65, 12, 32]) 5200
RoPE forward torch.Size([1, 65, 12, 32]) 5200
RoPE forward torch.Size([1, 65, 12, 32]) 5200
RoPE forward torch.Size([1, 65, 12, 32]) 5200
RoPE forward torch.Size([1, 65, 12, 32]) 5200
RoPE forward torch.Size([1, 65, 12, 32]) 5200
RoPE forward torch.Size([1, 65, 12, 32]) 5200
RoPE forward torch.Size([1, 65, 12, 32]) 5200
RoPE forward torch.Size([1, 65, 12, 32]) 5200
RoPE forward torch.Size([1, 65, 12, 32]) 5200
RoPE forward torch.Size([1, 65, 12, 32]) 5200
RoPE forward torch.Size([1, 65, 12, 32]) 5200
RoPE forward torch.Size([1, 65, 12, 32]) 5200
RoPE forward torch.Size([1, 65, 12, 32]) 5265
RoPE forward torch.Size([1, 65, 12, 32]) 5265
RoPE forward torch.Size([1, 65, 12, 32]) 5265
RoPE forward torch.Size([1, 65, 12, 32]) 5265
RoPE forward torch.Size([1, 65, 12, 32]) 5265
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 24.59it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 5330
RoPE forward torch.Size([1, 65, 12, 32]) 5330
RoPE forward torch.Size([1, 65, 12, 32]) 5330
RoPE forward torch.Size([1, 65, 12, 32]) 5330
RoPE forward torch.Size([1, 65, 12, 32]) 5330
RoPE forward torch.Size([1, 65, 12, 32]) 5330
RoPE forward torch.Size([1, 65, 12, 32]) 5330
RoPE forward torch.Size([1, 65, 12, 32]) 5330
RoPE forward torch.Size([1, 65, 12, 32]) 5330
RoPE forward torch.Size([1, 65, 12, 32]) 5330
RoPE forward torch.Size([1, 65, 12, 32]) 5395
RoPE forward torch.Size([1, 65, 12, 32]) 5395
RoPE forward torch.Size([1, 65, 12, 32]) 5395
RoPE forward torch.Size([1, 65, 12, 32]) 5395
RoPE forward torch.Size([1, 65, 12, 32]) 5395
RoPE forward torch.Size([1, 65, 12, 32]) 5395


100%|██████████| 4/4 [00:00<00:00, 23.06it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 5395
RoPE forward torch.Size([1, 65, 12, 32]) 5395
RoPE forward torch.Size([1, 65, 12, 32]) 5395
RoPE forward torch.Size([1, 65, 12, 32]) 5395
RoPE forward torch.Size([1, 65, 12, 32]) 5395
RoPE forward torch.Size([1, 65, 12, 32]) 5395
RoPE forward torch.Size([1, 65, 12, 32]) 5395
RoPE forward torch.Size([1, 65, 12, 32]) 5395
RoPE forward torch.Size([1, 65, 12, 32]) 5395
RoPE forward torch.Size([1, 65, 12, 32]) 5395
21


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 5460
RoPE forward torch.Size([1, 65, 12, 32]) 5460
RoPE forward torch.Size([1, 65, 12, 32]) 5460
RoPE forward torch.Size([1, 65, 12, 32]) 5460
RoPE forward torch.Size([1, 65, 12, 32]) 5460
RoPE forward torch.Size([1, 65, 12, 32]) 5460
RoPE forward torch.Size([1, 65, 12, 32]) 5460
RoPE forward torch.Size([1, 65, 12, 32]) 5460
RoPE forward torch.Size([1, 65, 12, 32]) 5460
RoPE forward torch.Size([1, 65, 12, 32]) 5460
RoPE forward torch.Size([1, 65, 12, 32]) 5460
RoPE forward torch.Size([1, 65, 12, 32]) 5460
RoPE forward torch.Size([1, 65, 12, 32]) 5460
RoPE forward torch.Size([1, 65, 12, 32]) 5460
RoPE forward torch.Size([1, 65, 12, 32]) 5460
RoPE forward torch.Size([1, 65, 12, 32]) 5460
RoPE forward torch.Size([1, 65, 12, 32]) 5525
RoPE forward torch.Size([1, 65, 12, 32]) 5525
RoPE forward torch.Size([1, 65, 12, 32]) 5525
RoPE forward torch.Size([1, 65, 12, 32]) 5525
RoPE forward torch.Size([1, 65, 12, 32]) 5525
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 25.00it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 5590
RoPE forward torch.Size([1, 65, 12, 32]) 5590
RoPE forward torch.Size([1, 65, 12, 32]) 5655
RoPE forward torch.Size([1, 65, 12, 32]) 5655


100%|██████████| 4/4 [00:00<00:00, 24.76it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 5655
RoPE forward torch.Size([1, 65, 12, 32]) 5655
RoPE forward torch.Size([1, 65, 12, 32]) 5655
RoPE forward torch.Size([1, 65, 12, 32]) 5655
RoPE forward torch.Size([1, 65, 12, 32]) 5655
RoPE forward torch.Size([1, 65, 12, 32]) 5655
RoPE forward torch.Size([1, 65, 12, 32]) 5655
RoPE forward torch.Size([1, 65, 12, 32]) 5655
RoPE forward torch.Size([1, 65, 12, 32]) 5655
RoPE forward torch.Size([1, 65, 12, 32]) 5655
RoPE forward torch.Size([1, 65, 12, 32]) 5655
RoPE forward torch.Size([1, 65, 12, 32]) 5655
RoPE forward torch.Size([1, 65, 12, 32]) 5655
RoPE forward torch.Size([1, 65, 12, 32]) 5655
22


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 5720
RoPE forward torch.Size([1, 65, 12, 32]) 5720
RoPE forward torch.Size([1, 65, 12, 32]) 5720
RoPE forward torch.Size([1, 65, 12, 32]) 5720
RoPE forward torch.Size([1, 65, 12, 32]) 5720
RoPE forward torch.Size([1, 65, 12, 32]) 5720
RoPE forward torch.Size([1, 65, 12, 32]) 5720
RoPE forward torch.Size([1, 65, 12, 32]) 5720
RoPE forward torch.Size([1, 65, 12, 32]) 5720
RoPE forward torch.Size([1, 65, 12, 32]) 5720
RoPE forward torch.Size([1, 65, 12, 32]) 5720
RoPE forward torch.Size([1, 65, 12, 32]) 5720
RoPE forward torch.Size([1, 65, 12, 32]) 5720
RoPE forward torch.Size([1, 65, 12, 32]) 5720
RoPE forward torch.Size([1, 65, 12, 32]) 5720
RoPE forward torch.Size([1, 65, 12, 32]) 5720
RoPE forward torch.Size([1, 65, 12, 32]) 5785
RoPE forward torch.Size([1, 65, 12, 32]) 5785
RoPE forward torch.Size([1, 65, 12, 32]) 5785
RoPE forward torch.Size([1, 65, 12, 32]) 5785
RoPE forward torch.Size([1, 65, 12, 32]) 5785
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 22.81it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 5850
RoPE forward torch.Size([1, 65, 12, 32]) 5850
RoPE forward torch.Size([1, 65, 12, 32]) 5915
RoPE forward torch.Size([1, 65, 12, 32]) 5915
RoPE forward torch.Size([1, 65, 12, 32]) 5915
RoPE forward torch.Size([1, 65, 12, 32]) 5915
RoPE forward torch.Size([1, 65, 12, 32]) 5915
RoPE forward torch.Size([1, 65, 12, 32]) 5915
RoPE forward torch.Size([1, 65, 12, 32]) 5915
RoPE forward torch.Size([1, 65, 12, 32]) 5915
RoPE forward torch.Size([1, 65, 12, 32]) 5915
RoPE forward torch.Size([1, 65, 12, 32]) 5915
RoPE forward torch.Size([1, 65, 12, 32]) 5915
RoPE forward torch.Size([1, 65, 12, 32]) 5915


100%|██████████| 4/4 [00:00<00:00, 22.17it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 5915
RoPE forward torch.Size([1, 65, 12, 32]) 5915
RoPE forward torch.Size([1, 65, 12, 32]) 5915
RoPE forward torch.Size([1, 65, 12, 32]) 5915
23


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 5980
RoPE forward torch.Size([1, 65, 12, 32]) 5980
RoPE forward torch.Size([1, 65, 12, 32]) 5980
RoPE forward torch.Size([1, 65, 12, 32]) 5980
RoPE forward torch.Size([1, 65, 12, 32]) 5980
RoPE forward torch.Size([1, 65, 12, 32]) 5980
RoPE forward torch.Size([1, 65, 12, 32]) 5980
RoPE forward torch.Size([1, 65, 12, 32]) 5980
RoPE forward torch.Size([1, 65, 12, 32]) 5980
RoPE forward torch.Size([1, 65, 12, 32]) 5980
RoPE forward torch.Size([1, 65, 12, 32]) 5980
RoPE forward torch.Size([1, 65, 12, 32]) 5980
RoPE forward torch.Size([1, 65, 12, 32]) 5980
RoPE forward torch.Size([1, 65, 12, 32]) 5980
RoPE forward torch.Size([1, 65, 12, 32]) 5980
RoPE forward torch.Size([1, 65, 12, 32]) 5980
RoPE forward torch.Size([1, 65, 12, 32]) 6045
RoPE forward torch.Size([1, 65, 12, 32]) 6045
RoPE forward torch.Size([1, 65, 12, 32]) 6045
RoPE forward torch.Size([1, 65, 12, 32]) 6045
RoPE forward torch.Size([1, 65, 12, 32]) 6045
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 23.25it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 6110
RoPE forward torch.Size([1, 65, 12, 32]) 6175
RoPE forward torch.Size([1, 65, 12, 32]) 6175
RoPE forward torch.Size([1, 65, 12, 32]) 6175
RoPE forward torch.Size([1, 65, 12, 32]) 6175
RoPE forward torch.Size([1, 65, 12, 32]) 6175
RoPE forward torch.Size([1, 65, 12, 32]) 6175
RoPE forward torch.Size([1, 65, 12, 32]) 6175
RoPE forward torch.Size([1, 65, 12, 32]) 6175
RoPE forward torch.Size([1, 65, 12, 32]) 6175
RoPE forward torch.Size([1, 65, 12, 32]) 6175


100%|██████████| 4/4 [00:00<00:00, 23.42it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 6175
RoPE forward torch.Size([1, 65, 12, 32]) 6175
RoPE forward torch.Size([1, 65, 12, 32]) 6175
RoPE forward torch.Size([1, 65, 12, 32]) 6175
RoPE forward torch.Size([1, 65, 12, 32]) 6175
RoPE forward torch.Size([1, 65, 12, 32]) 6175
24


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 6240
RoPE forward torch.Size([1, 65, 12, 32]) 6240
RoPE forward torch.Size([1, 65, 12, 32]) 6240
RoPE forward torch.Size([1, 65, 12, 32]) 6240
RoPE forward torch.Size([1, 65, 12, 32]) 6240
RoPE forward torch.Size([1, 65, 12, 32]) 6240
RoPE forward torch.Size([1, 65, 12, 32]) 6240
RoPE forward torch.Size([1, 65, 12, 32]) 6240
RoPE forward torch.Size([1, 65, 12, 32]) 6240
RoPE forward torch.Size([1, 65, 12, 32]) 6240
RoPE forward torch.Size([1, 65, 12, 32]) 6240
RoPE forward torch.Size([1, 65, 12, 32]) 6240
RoPE forward torch.Size([1, 65, 12, 32]) 6240
RoPE forward torch.Size([1, 65, 12, 32]) 6240
RoPE forward torch.Size([1, 65, 12, 32]) 6240
RoPE forward torch.Size([1, 65, 12, 32]) 6240
RoPE forward torch.Size([1, 65, 12, 32]) 6305
RoPE forward torch.Size([1, 65, 12, 32]) 6305
RoPE forward torch.Size([1, 65, 12, 32]) 6305
RoPE forward torch.Size([1, 65, 12, 32]) 6305
RoPE forward torch.Size([1, 65, 12, 32]) 6305
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 24.13it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 6435
RoPE forward torch.Size([1, 65, 12, 32]) 6435
RoPE forward torch.Size([1, 65, 12, 32]) 6435
RoPE forward torch.Size([1, 65, 12, 32]) 6435
RoPE forward torch.Size([1, 65, 12, 32]) 6435
RoPE forward torch.Size([1, 65, 12, 32]) 6435
RoPE forward torch.Size([1, 65, 12, 32]) 6435
RoPE forward torch.Size([1, 65, 12, 32]) 6435
RoPE forward torch.Size([1, 65, 12, 32]) 6435
RoPE forward torch.Size([1, 65, 12, 32]) 6435
RoPE forward torch.Size([1, 65, 12, 32]) 6435
RoPE forward torch.Size([1, 65, 12, 32]) 6435


100%|██████████| 4/4 [00:00<00:00, 23.77it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 6435
RoPE forward torch.Size([1, 65, 12, 32]) 6435
RoPE forward torch.Size([1, 65, 12, 32]) 6435
RoPE forward torch.Size([1, 65, 12, 32]) 6435
25


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 6500
RoPE forward torch.Size([1, 65, 12, 32]) 6500
RoPE forward torch.Size([1, 65, 12, 32]) 6500
RoPE forward torch.Size([1, 65, 12, 32]) 6500
RoPE forward torch.Size([1, 65, 12, 32]) 6500
RoPE forward torch.Size([1, 65, 12, 32]) 6500
RoPE forward torch.Size([1, 65, 12, 32]) 6500
RoPE forward torch.Size([1, 65, 12, 32]) 6500
RoPE forward torch.Size([1, 65, 12, 32]) 6500
RoPE forward torch.Size([1, 65, 12, 32]) 6500
RoPE forward torch.Size([1, 65, 12, 32]) 6500
RoPE forward torch.Size([1, 65, 12, 32]) 6500
RoPE forward torch.Size([1, 65, 12, 32]) 6500
RoPE forward torch.Size([1, 65, 12, 32]) 6500
RoPE forward torch.Size([1, 65, 12, 32]) 6500
RoPE forward torch.Size([1, 65, 12, 32]) 6500
RoPE forward torch.Size([1, 65, 12, 32]) 6565
RoPE forward torch.Size([1, 65, 12, 32]) 6565
RoPE forward torch.Size([1, 65, 12, 32]) 6565
RoPE forward torch.Size([1, 65, 12, 32]) 6565
RoPE forward torch.Size([1, 65, 12, 32]) 6565
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 23.31it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 6630
RoPE forward torch.Size([1, 65, 12, 32]) 6630
RoPE forward torch.Size([1, 65, 12, 32]) 6630
RoPE forward torch.Size([1, 65, 12, 32]) 6630
RoPE forward torch.Size([1, 65, 12, 32]) 6630
RoPE forward torch.Size([1, 65, 12, 32]) 6630
RoPE forward torch.Size([1, 65, 12, 32]) 6630
RoPE forward torch.Size([1, 65, 12, 32]) 6630
RoPE forward torch.Size([1, 65, 12, 32]) 6630
RoPE forward torch.Size([1, 65, 12, 32]) 6630
RoPE forward torch.Size([1, 65, 12, 32]) 6630
RoPE forward torch.Size([1, 65, 12, 32]) 6630
RoPE forward torch.Size([1, 65, 12, 32]) 6695
RoPE forward torch.Size([1, 65, 12, 32]) 6695
RoPE forward torch.Size([1, 65, 12, 32]) 6695
RoPE forward torch.Size([1, 65, 12, 32]) 6695
RoPE forward torch.Size([1, 65, 12, 32]) 6695
RoPE forward torch.Size([1, 65, 12, 32]) 6695
RoPE forward torch.Size([1, 65, 12, 32]) 6695
RoPE forward torch.Size([1, 65, 12, 32]) 6695
RoPE forward torch.Size([1, 65, 12, 32]) 6695
RoPE forward torch.Size([1, 65, 12

100%|██████████| 4/4 [00:00<00:00, 22.64it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 6695
RoPE forward torch.Size([1, 65, 12, 32]) 6695
26


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 6760
RoPE forward torch.Size([1, 65, 12, 32]) 6760
RoPE forward torch.Size([1, 65, 12, 32]) 6760
RoPE forward torch.Size([1, 65, 12, 32]) 6760
RoPE forward torch.Size([1, 65, 12, 32]) 6760
RoPE forward torch.Size([1, 65, 12, 32]) 6760
RoPE forward torch.Size([1, 65, 12, 32]) 6760
RoPE forward torch.Size([1, 65, 12, 32]) 6760
RoPE forward torch.Size([1, 65, 12, 32]) 6760
RoPE forward torch.Size([1, 65, 12, 32]) 6760
RoPE forward torch.Size([1, 65, 12, 32]) 6760
RoPE forward torch.Size([1, 65, 12, 32]) 6760
RoPE forward torch.Size([1, 65, 12, 32]) 6760
RoPE forward torch.Size([1, 65, 12, 32]) 6760
RoPE forward torch.Size([1, 65, 12, 32]) 6760
RoPE forward torch.Size([1, 65, 12, 32]) 6760
RoPE forward torch.Size([1, 65, 12, 32]) 6825
RoPE forward torch.Size([1, 65, 12, 32]) 6825
RoPE forward torch.Size([1, 65, 12, 32]) 6825
RoPE forward torch.Size([1, 65, 12, 32]) 6825
RoPE forward torch.Size([1, 65, 12, 32]) 6825
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 24.48it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 6890
RoPE forward torch.Size([1, 65, 12, 32]) 6955
RoPE forward torch.Size([1, 65, 12, 32]) 6955
RoPE forward torch.Size([1, 65, 12, 32]) 6955
RoPE forward torch.Size([1, 65, 12, 32]) 6955
RoPE forward torch.Size([1, 65, 12, 32]) 6955
RoPE forward torch.Size([1, 65, 12, 32]) 6955
RoPE forward torch.Size([1, 65, 12, 32]) 6955
RoPE forward torch.Size([1, 65, 12, 32]) 6955
RoPE forward torch.Size([1, 65, 12, 32]) 6955
RoPE forward torch.Size([1, 65, 12, 32]) 6955
RoPE forward torch.Size([1, 65, 12, 32]) 6955
RoPE forward torch.Size([1, 65, 12, 32]) 6955


100%|██████████| 4/4 [00:00<00:00, 23.23it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 6955
RoPE forward torch.Size([1, 65, 12, 32]) 6955
RoPE forward torch.Size([1, 65, 12, 32]) 6955
RoPE forward torch.Size([1, 65, 12, 32]) 6955
27


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 7020
RoPE forward torch.Size([1, 65, 12, 32]) 7020
RoPE forward torch.Size([1, 65, 12, 32]) 7020
RoPE forward torch.Size([1, 65, 12, 32]) 7020
RoPE forward torch.Size([1, 65, 12, 32]) 7020
RoPE forward torch.Size([1, 65, 12, 32]) 7020
RoPE forward torch.Size([1, 65, 12, 32]) 7020
RoPE forward torch.Size([1, 65, 12, 32]) 7020
RoPE forward torch.Size([1, 65, 12, 32]) 7020
RoPE forward torch.Size([1, 65, 12, 32]) 7020
RoPE forward torch.Size([1, 65, 12, 32]) 7020
RoPE forward torch.Size([1, 65, 12, 32]) 7020
RoPE forward torch.Size([1, 65, 12, 32]) 7020
RoPE forward torch.Size([1, 65, 12, 32]) 7020
RoPE forward torch.Size([1, 65, 12, 32]) 7020
RoPE forward torch.Size([1, 65, 12, 32]) 7020
RoPE forward torch.Size([1, 65, 12, 32]) 7085
RoPE forward torch.Size([1, 65, 12, 32]) 7085
RoPE forward torch.Size([1, 65, 12, 32]) 7085
RoPE forward torch.Size([1, 65, 12, 32]) 7085
RoPE forward torch.Size([1, 65, 12, 32]) 7085
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 21.99it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 7150
RoPE forward torch.Size([1, 65, 12, 32]) 7150
RoPE forward torch.Size([1, 65, 12, 32]) 7150
RoPE forward torch.Size([1, 65, 12, 32]) 7150
RoPE forward torch.Size([1, 65, 12, 32]) 7215
RoPE forward torch.Size([1, 65, 12, 32]) 7215


100%|██████████| 4/4 [00:00<00:00, 22.42it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 7215
RoPE forward torch.Size([1, 65, 12, 32]) 7215
RoPE forward torch.Size([1, 65, 12, 32]) 7215
RoPE forward torch.Size([1, 65, 12, 32]) 7215
RoPE forward torch.Size([1, 65, 12, 32]) 7215
RoPE forward torch.Size([1, 65, 12, 32]) 7215
RoPE forward torch.Size([1, 65, 12, 32]) 7215
RoPE forward torch.Size([1, 65, 12, 32]) 7215
RoPE forward torch.Size([1, 65, 12, 32]) 7215
RoPE forward torch.Size([1, 65, 12, 32]) 7215
RoPE forward torch.Size([1, 65, 12, 32]) 7215
RoPE forward torch.Size([1, 65, 12, 32]) 7215
RoPE forward torch.Size([1, 65, 12, 32]) 7215
RoPE forward torch.Size([1, 65, 12, 32]) 7215
28


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 7280
RoPE forward torch.Size([1, 65, 12, 32]) 7280
RoPE forward torch.Size([1, 65, 12, 32]) 7280
RoPE forward torch.Size([1, 65, 12, 32]) 7280
RoPE forward torch.Size([1, 65, 12, 32]) 7280
RoPE forward torch.Size([1, 65, 12, 32]) 7280
RoPE forward torch.Size([1, 65, 12, 32]) 7280
RoPE forward torch.Size([1, 65, 12, 32]) 7280
RoPE forward torch.Size([1, 65, 12, 32]) 7280
RoPE forward torch.Size([1, 65, 12, 32]) 7280
RoPE forward torch.Size([1, 65, 12, 32]) 7280
RoPE forward torch.Size([1, 65, 12, 32]) 7280
RoPE forward torch.Size([1, 65, 12, 32]) 7280
RoPE forward torch.Size([1, 65, 12, 32]) 7280
RoPE forward torch.Size([1, 65, 12, 32]) 7280
RoPE forward torch.Size([1, 65, 12, 32]) 7280
RoPE forward torch.Size([1, 65, 12, 32]) 7345
RoPE forward torch.Size([1, 65, 12, 32]) 7345
RoPE forward torch.Size([1, 65, 12, 32]) 7345
RoPE forward torch.Size([1, 65, 12, 32]) 7345
RoPE forward torch.Size([1, 65, 12, 32]) 7345
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 21.36it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 7410
RoPE forward torch.Size([1, 65, 12, 32]) 7410
RoPE forward torch.Size([1, 65, 12, 32]) 7410
RoPE forward torch.Size([1, 65, 12, 32]) 7475
RoPE forward torch.Size([1, 65, 12, 32]) 7475
RoPE forward torch.Size([1, 65, 12, 32]) 7475
RoPE forward torch.Size([1, 65, 12, 32]) 7475
RoPE forward torch.Size([1, 65, 12, 32]) 7475
RoPE forward torch.Size([1, 65, 12, 32]) 7475
RoPE forward torch.Size([1, 65, 12, 32]) 7475
RoPE forward torch.Size([1, 65, 12, 32]) 7475


100%|██████████| 4/4 [00:00<00:00, 20.89it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 7475
RoPE forward torch.Size([1, 65, 12, 32]) 7475
RoPE forward torch.Size([1, 65, 12, 32]) 7475
RoPE forward torch.Size([1, 65, 12, 32]) 7475
RoPE forward torch.Size([1, 65, 12, 32]) 7475
RoPE forward torch.Size([1, 65, 12, 32]) 7475
RoPE forward torch.Size([1, 65, 12, 32]) 7475
RoPE forward torch.Size([1, 65, 12, 32]) 7475
29


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 7540
RoPE forward torch.Size([1, 65, 12, 32]) 7540
RoPE forward torch.Size([1, 65, 12, 32]) 7540
RoPE forward torch.Size([1, 65, 12, 32]) 7540
RoPE forward torch.Size([1, 65, 12, 32]) 7540
RoPE forward torch.Size([1, 65, 12, 32]) 7540
RoPE forward torch.Size([1, 65, 12, 32]) 7540
RoPE forward torch.Size([1, 65, 12, 32]) 7540
RoPE forward torch.Size([1, 65, 12, 32]) 7540
RoPE forward torch.Size([1, 65, 12, 32]) 7540
RoPE forward torch.Size([1, 65, 12, 32]) 7540
RoPE forward torch.Size([1, 65, 12, 32]) 7540
RoPE forward torch.Size([1, 65, 12, 32]) 7540
RoPE forward torch.Size([1, 65, 12, 32]) 7540
RoPE forward torch.Size([1, 65, 12, 32]) 7540
RoPE forward torch.Size([1, 65, 12, 32]) 7540
RoPE forward torch.Size([1, 65, 12, 32]) 7605
RoPE forward torch.Size([1, 65, 12, 32]) 7605
RoPE forward torch.Size([1, 65, 12, 32]) 7605
RoPE forward torch.Size([1, 65, 12, 32]) 7605
RoPE forward torch.Size([1, 65, 12, 32]) 7605
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 20.80it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 7670
RoPE forward torch.Size([1, 65, 12, 32]) 7670
RoPE forward torch.Size([1, 65, 12, 32]) 7735
RoPE forward torch.Size([1, 65, 12, 32]) 7735
RoPE forward torch.Size([1, 65, 12, 32]) 7735
RoPE forward torch.Size([1, 65, 12, 32]) 7735
RoPE forward torch.Size([1, 65, 12, 32]) 7735
RoPE forward torch.Size([1, 65, 12, 32]) 7735
RoPE forward torch.Size([1, 65, 12, 32]) 7735
RoPE forward torch.Size([1, 65, 12, 32]) 7735
RoPE forward torch.Size([1, 65, 12, 32]) 7735
RoPE forward torch.Size([1, 65, 12, 32]) 7735
RoPE forward torch.Size([1, 65, 12, 32]) 7735
RoPE forward torch.Size([1, 65, 12, 32]) 7735


100%|██████████| 4/4 [00:00<00:00, 20.99it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 7735
RoPE forward torch.Size([1, 65, 12, 32]) 7735
RoPE forward torch.Size([1, 65, 12, 32]) 7735
RoPE forward torch.Size([1, 65, 12, 32]) 7735
30


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 7800
RoPE forward torch.Size([1, 65, 12, 32]) 7800
RoPE forward torch.Size([1, 65, 12, 32]) 7800
RoPE forward torch.Size([1, 65, 12, 32]) 7800
RoPE forward torch.Size([1, 65, 12, 32]) 7800
RoPE forward torch.Size([1, 65, 12, 32]) 7800
RoPE forward torch.Size([1, 65, 12, 32]) 7800
RoPE forward torch.Size([1, 65, 12, 32]) 7800
RoPE forward torch.Size([1, 65, 12, 32]) 7800
RoPE forward torch.Size([1, 65, 12, 32]) 7800
RoPE forward torch.Size([1, 65, 12, 32]) 7800
RoPE forward torch.Size([1, 65, 12, 32]) 7800
RoPE forward torch.Size([1, 65, 12, 32]) 7800
RoPE forward torch.Size([1, 65, 12, 32]) 7800
RoPE forward torch.Size([1, 65, 12, 32]) 7800
RoPE forward torch.Size([1, 65, 12, 32]) 7800
RoPE forward torch.Size([1, 65, 12, 32]) 7865
RoPE forward torch.Size([1, 65, 12, 32]) 7865
RoPE forward torch.Size([1, 65, 12, 32]) 7865
RoPE forward torch.Size([1, 65, 12, 32]) 7865
RoPE forward torch.Size([1, 65, 12, 32]) 7865
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 21.16it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 7995
RoPE forward torch.Size([1, 65, 12, 32]) 7995
RoPE forward torch.Size([1, 65, 12, 32]) 7995
RoPE forward torch.Size([1, 65, 12, 32]) 7995
RoPE forward torch.Size([1, 65, 12, 32]) 7995
RoPE forward torch.Size([1, 65, 12, 32]) 7995
RoPE forward torch.Size([1, 65, 12, 32]) 7995
RoPE forward torch.Size([1, 65, 12, 32]) 7995
RoPE forward torch.Size([1, 65, 12, 32]) 7995
RoPE forward torch.Size([1, 65, 12, 32]) 7995


100%|██████████| 4/4 [00:00<00:00, 21.18it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 7995
RoPE forward torch.Size([1, 65, 12, 32]) 7995
RoPE forward torch.Size([1, 65, 12, 32]) 7995
RoPE forward torch.Size([1, 65, 12, 32]) 7995
RoPE forward torch.Size([1, 65, 12, 32]) 7995
RoPE forward torch.Size([1, 65, 12, 32]) 7995





31


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 8060
RoPE forward torch.Size([1, 65, 12, 32]) 8060
RoPE forward torch.Size([1, 65, 12, 32]) 8060
RoPE forward torch.Size([1, 65, 12, 32]) 8060
RoPE forward torch.Size([1, 65, 12, 32]) 8060
RoPE forward torch.Size([1, 65, 12, 32]) 8060
RoPE forward torch.Size([1, 65, 12, 32]) 8060
RoPE forward torch.Size([1, 65, 12, 32]) 8060
RoPE forward torch.Size([1, 65, 12, 32]) 8060
RoPE forward torch.Size([1, 65, 12, 32]) 8060
RoPE forward torch.Size([1, 65, 12, 32]) 8060
RoPE forward torch.Size([1, 65, 12, 32]) 8060
RoPE forward torch.Size([1, 65, 12, 32]) 8060
RoPE forward torch.Size([1, 65, 12, 32]) 8060
RoPE forward torch.Size([1, 65, 12, 32]) 8060
RoPE forward torch.Size([1, 65, 12, 32]) 8060
RoPE forward torch.Size([1, 65, 12, 32]) 8125
RoPE forward torch.Size([1, 65, 12, 32]) 8125
RoPE forward torch.Size([1, 65, 12, 32]) 8125
RoPE forward torch.Size([1, 65, 12, 32]) 8125
RoPE forward torch.Size([1, 65, 12, 32]) 8125
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 20.67it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 8190
RoPE forward torch.Size([1, 65, 12, 32]) 8190
RoPE forward torch.Size([1, 65, 12, 32]) 8190
RoPE forward torch.Size([1, 65, 12, 32]) 8190
RoPE forward torch.Size([1, 65, 12, 32]) 8255
RoPE forward torch.Size([1, 65, 12, 32]) 8255
RoPE forward torch.Size([1, 65, 12, 32]) 8255
RoPE forward torch.Size([1, 65, 12, 32]) 8255
RoPE forward torch.Size([1, 65, 12, 32]) 8255
RoPE forward torch.Size([1, 65, 12, 32]) 8255
RoPE forward torch.Size([1, 65, 12, 32]) 8255
RoPE forward torch.Size([1, 65, 12, 32]) 8255
RoPE forward torch.Size([1, 65, 12, 32]) 8255
RoPE forward torch.Size([1, 65, 12, 32]) 8255
RoPE forward torch.Size([1, 65, 12, 32]) 8255
RoPE forward torch.Size([1, 65, 12, 32]) 8255
RoPE forward torch.Size([1, 65, 12, 32]) 8255
RoPE forward torch.Size([1, 65, 12, 32]) 8255


100%|██████████| 4/4 [00:00<00:00, 20.82it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 8255
RoPE forward torch.Size([1, 65, 12, 32]) 8255
32


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 8320
RoPE forward torch.Size([1, 65, 12, 32]) 8320
RoPE forward torch.Size([1, 65, 12, 32]) 8320
RoPE forward torch.Size([1, 65, 12, 32]) 8320
RoPE forward torch.Size([1, 65, 12, 32]) 8320
RoPE forward torch.Size([1, 65, 12, 32]) 8320
RoPE forward torch.Size([1, 65, 12, 32]) 8320
RoPE forward torch.Size([1, 65, 12, 32]) 8320
RoPE forward torch.Size([1, 65, 12, 32]) 8320
RoPE forward torch.Size([1, 65, 12, 32]) 8320
RoPE forward torch.Size([1, 65, 12, 32]) 8320
RoPE forward torch.Size([1, 65, 12, 32]) 8320
RoPE forward torch.Size([1, 65, 12, 32]) 8320
RoPE forward torch.Size([1, 65, 12, 32]) 8320
RoPE forward torch.Size([1, 65, 12, 32]) 8320
RoPE forward torch.Size([1, 65, 12, 32]) 8320
RoPE forward torch.Size([1, 65, 12, 32]) 8385
RoPE forward torch.Size([1, 65, 12, 32]) 8385
RoPE forward torch.Size([1, 65, 12, 32]) 8385
RoPE forward torch.Size([1, 65, 12, 32]) 8385
RoPE forward torch.Size([1, 65, 12, 32]) 8385
RoPE forward torch.Size([1, 65, 12

 50%|█████     | 2/4 [00:00<00:00, 19.76it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 8450
RoPE forward torch.Size([1, 65, 12, 32]) 8450
RoPE forward torch.Size([1, 65, 12, 32]) 8450
RoPE forward torch.Size([1, 65, 12, 32]) 8450
RoPE forward torch.Size([1, 65, 12, 32]) 8450
RoPE forward torch.Size([1, 65, 12, 32]) 8450
RoPE forward torch.Size([1, 65, 12, 32]) 8450
RoPE forward torch.Size([1, 65, 12, 32]) 8450
RoPE forward torch.Size([1, 65, 12, 32]) 8450
RoPE forward torch.Size([1, 65, 12, 32]) 8450
RoPE forward torch.Size([1, 65, 12, 32]) 8450
RoPE forward torch.Size([1, 65, 12, 32]) 8450
RoPE forward torch.Size([1, 65, 12, 32]) 8450
RoPE forward torch.Size([1, 65, 12, 32]) 8450
RoPE forward torch.Size([1, 65, 12, 32]) 8450
RoPE forward torch.Size([1, 65, 12, 32]) 8450
RoPE forward torch.Size([1, 65, 12, 32]) 8515
RoPE forward torch.Size([1, 65, 12, 32]) 8515
RoPE forward torch.Size([1, 65, 12, 32]) 8515
RoPE forward torch.Size([1, 65, 12, 32]) 8515
RoPE forward torch.Size([1, 65, 12, 32]) 8515
RoPE forward torch.Size([1, 65, 12

100%|██████████| 4/4 [00:00<00:00, 20.31it/s]


33


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 8580
RoPE forward torch.Size([1, 65, 12, 32]) 8580
RoPE forward torch.Size([1, 65, 12, 32]) 8580
RoPE forward torch.Size([1, 65, 12, 32]) 8580
RoPE forward torch.Size([1, 65, 12, 32]) 8580
RoPE forward torch.Size([1, 65, 12, 32]) 8580
RoPE forward torch.Size([1, 65, 12, 32]) 8580
RoPE forward torch.Size([1, 65, 12, 32]) 8580
RoPE forward torch.Size([1, 65, 12, 32]) 8580
RoPE forward torch.Size([1, 65, 12, 32]) 8580
RoPE forward torch.Size([1, 65, 12, 32]) 8580
RoPE forward torch.Size([1, 65, 12, 32]) 8580
RoPE forward torch.Size([1, 65, 12, 32]) 8580
RoPE forward torch.Size([1, 65, 12, 32]) 8580
RoPE forward torch.Size([1, 65, 12, 32]) 8580
RoPE forward torch.Size([1, 65, 12, 32]) 8580
RoPE forward torch.Size([1, 65, 12, 32]) 8645
RoPE forward torch.Size([1, 65, 12, 32]) 8645
RoPE forward torch.Size([1, 65, 12, 32]) 8645
RoPE forward torch.Size([1, 65, 12, 32]) 8645
RoPE forward torch.Size([1, 65, 12, 32]) 8645
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 22.03it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 8710
RoPE forward torch.Size([1, 65, 12, 32]) 8710
RoPE forward torch.Size([1, 65, 12, 32]) 8710
RoPE forward torch.Size([1, 65, 12, 32]) 8775
RoPE forward torch.Size([1, 65, 12, 32]) 8775
RoPE forward torch.Size([1, 65, 12, 32]) 8775
RoPE forward torch.Size([1, 65, 12, 32]) 8775
RoPE forward torch.Size([1, 65, 12, 32]) 8775
RoPE forward torch.Size([1, 65, 12, 32]) 8775
RoPE forward torch.Size([1, 65, 12, 32]) 8775
RoPE forward torch.Size([1, 65, 12, 32]) 8775
RoPE forward torch.Size([1, 65, 12, 32]) 8775
RoPE forward torch.Size([1, 65, 12, 32]) 8775
RoPE forward torch.Size([1, 65, 12, 32]) 8775
RoPE forward torch.Size([1, 65, 12, 32]) 8775
RoPE forward torch.Size([1, 65, 12, 32]) 8775
RoPE forward torch.Size([1, 65, 12, 32]) 8775
RoPE forward torch.Size([1, 65, 12, 32]) 8775
RoPE forward torch.Size([1, 65, 12, 32]) 8775


100%|██████████| 4/4 [00:00<00:00, 21.82it/s]


34


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 8840
RoPE forward torch.Size([1, 65, 12, 32]) 8840
RoPE forward torch.Size([1, 65, 12, 32]) 8840
RoPE forward torch.Size([1, 65, 12, 32]) 8840
RoPE forward torch.Size([1, 65, 12, 32]) 8840
RoPE forward torch.Size([1, 65, 12, 32]) 8840
RoPE forward torch.Size([1, 65, 12, 32]) 8840
RoPE forward torch.Size([1, 65, 12, 32]) 8840
RoPE forward torch.Size([1, 65, 12, 32]) 8840
RoPE forward torch.Size([1, 65, 12, 32]) 8840
RoPE forward torch.Size([1, 65, 12, 32]) 8840
RoPE forward torch.Size([1, 65, 12, 32]) 8840
RoPE forward torch.Size([1, 65, 12, 32]) 8840
RoPE forward torch.Size([1, 65, 12, 32]) 8840
RoPE forward torch.Size([1, 65, 12, 32]) 8840
RoPE forward torch.Size([1, 65, 12, 32]) 8840
RoPE forward torch.Size([1, 65, 12, 32]) 8905
RoPE forward torch.Size([1, 65, 12, 32]) 8905
RoPE forward torch.Size([1, 65, 12, 32]) 8905
RoPE forward torch.Size([1, 65, 12, 32]) 8905
RoPE forward torch.Size([1, 65, 12, 32]) 8905
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 21.83it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 8970
RoPE forward torch.Size([1, 65, 12, 32]) 8970
RoPE forward torch.Size([1, 65, 12, 32]) 9035
RoPE forward torch.Size([1, 65, 12, 32]) 9035
RoPE forward torch.Size([1, 65, 12, 32]) 9035
RoPE forward torch.Size([1, 65, 12, 32]) 9035
RoPE forward torch.Size([1, 65, 12, 32]) 9035
RoPE forward torch.Size([1, 65, 12, 32]) 9035
RoPE forward torch.Size([1, 65, 12, 32]) 9035
RoPE forward torch.Size([1, 65, 12, 32]) 9035
RoPE forward torch.Size([1, 65, 12, 32]) 9035
RoPE forward torch.Size([1, 65, 12, 32]) 9035


100%|██████████| 4/4 [00:00<00:00, 21.86it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 9035
RoPE forward torch.Size([1, 65, 12, 32]) 9035
RoPE forward torch.Size([1, 65, 12, 32]) 9035
RoPE forward torch.Size([1, 65, 12, 32]) 9035
RoPE forward torch.Size([1, 65, 12, 32]) 9035
RoPE forward torch.Size([1, 65, 12, 32]) 9035
35


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 9100
RoPE forward torch.Size([1, 65, 12, 32]) 9100
RoPE forward torch.Size([1, 65, 12, 32]) 9100
RoPE forward torch.Size([1, 65, 12, 32]) 9100
RoPE forward torch.Size([1, 65, 12, 32]) 9100
RoPE forward torch.Size([1, 65, 12, 32]) 9100
RoPE forward torch.Size([1, 65, 12, 32]) 9100
RoPE forward torch.Size([1, 65, 12, 32]) 9100
RoPE forward torch.Size([1, 65, 12, 32]) 9100
RoPE forward torch.Size([1, 65, 12, 32]) 9100
RoPE forward torch.Size([1, 65, 12, 32]) 9100
RoPE forward torch.Size([1, 65, 12, 32]) 9100
RoPE forward torch.Size([1, 65, 12, 32]) 9100
RoPE forward torch.Size([1, 65, 12, 32]) 9100
RoPE forward torch.Size([1, 65, 12, 32]) 9100
RoPE forward torch.Size([1, 65, 12, 32]) 9100
RoPE forward torch.Size([1, 65, 12, 32]) 9165
RoPE forward torch.Size([1, 65, 12, 32]) 9165
RoPE forward torch.Size([1, 65, 12, 32]) 9165
RoPE forward torch.Size([1, 65, 12, 32]) 9165
RoPE forward torch.Size([1, 65, 12, 32]) 9165
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 23.76it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 9230
RoPE forward torch.Size([1, 65, 12, 32]) 9230
RoPE forward torch.Size([1, 65, 12, 32]) 9230
RoPE forward torch.Size([1, 65, 12, 32]) 9230
RoPE forward torch.Size([1, 65, 12, 32]) 9295
RoPE forward torch.Size([1, 65, 12, 32]) 9295
RoPE forward torch.Size([1, 65, 12, 32]) 9295
RoPE forward torch.Size([1, 65, 12, 32]) 9295
RoPE forward torch.Size([1, 65, 12, 32]) 9295
RoPE forward torch.Size([1, 65, 12, 32]) 9295
RoPE forward torch.Size([1, 65, 12, 32]) 9295
RoPE forward torch.Size([1, 65, 12, 32]) 9295
RoPE forward torch.Size([1, 65, 12, 32]) 9295
RoPE forward torch.Size([1, 65, 12, 32]) 9295
RoPE forward torch.Size([1, 65, 12, 32]) 9295
RoPE forward torch.Size([1, 65, 12, 32]) 9295


100%|██████████| 4/4 [00:00<00:00, 23.36it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 9295
RoPE forward torch.Size([1, 65, 12, 32]) 9295
RoPE forward torch.Size([1, 65, 12, 32]) 9295
RoPE forward torch.Size([1, 65, 12, 32]) 9295
36


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 9360
RoPE forward torch.Size([1, 65, 12, 32]) 9360
RoPE forward torch.Size([1, 65, 12, 32]) 9360
RoPE forward torch.Size([1, 65, 12, 32]) 9360
RoPE forward torch.Size([1, 65, 12, 32]) 9360
RoPE forward torch.Size([1, 65, 12, 32]) 9360
RoPE forward torch.Size([1, 65, 12, 32]) 9360
RoPE forward torch.Size([1, 65, 12, 32]) 9360
RoPE forward torch.Size([1, 65, 12, 32]) 9360
RoPE forward torch.Size([1, 65, 12, 32]) 9360
RoPE forward torch.Size([1, 65, 12, 32]) 9360
RoPE forward torch.Size([1, 65, 12, 32]) 9360
RoPE forward torch.Size([1, 65, 12, 32]) 9360
RoPE forward torch.Size([1, 65, 12, 32]) 9360
RoPE forward torch.Size([1, 65, 12, 32]) 9360
RoPE forward torch.Size([1, 65, 12, 32]) 9360
RoPE forward torch.Size([1, 65, 12, 32]) 9425
RoPE forward torch.Size([1, 65, 12, 32]) 9425
RoPE forward torch.Size([1, 65, 12, 32]) 9425
RoPE forward torch.Size([1, 65, 12, 32]) 9425
RoPE forward torch.Size([1, 65, 12, 32]) 9425
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 22.46it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 9490
RoPE forward torch.Size([1, 65, 12, 32]) 9490
RoPE forward torch.Size([1, 65, 12, 32]) 9490
RoPE forward torch.Size([1, 65, 12, 32]) 9490
RoPE forward torch.Size([1, 65, 12, 32]) 9490
RoPE forward torch.Size([1, 65, 12, 32]) 9490
RoPE forward torch.Size([1, 65, 12, 32]) 9555
RoPE forward torch.Size([1, 65, 12, 32]) 9555
RoPE forward torch.Size([1, 65, 12, 32]) 9555
RoPE forward torch.Size([1, 65, 12, 32]) 9555
RoPE forward torch.Size([1, 65, 12, 32]) 9555
RoPE forward torch.Size([1, 65, 12, 32]) 9555
RoPE forward torch.Size([1, 65, 12, 32]) 9555
RoPE forward torch.Size([1, 65, 12, 32]) 9555
RoPE forward torch.Size([1, 65, 12, 32]) 9555
RoPE forward torch.Size([1, 65, 12, 32]) 9555
RoPE forward torch.Size([1, 65, 12, 32]) 9555
RoPE forward torch.Size([1, 65, 12, 32]) 9555
RoPE forward torch.Size([1, 65, 12, 32]) 9555
RoPE forward torch.Size([1, 65, 12, 32]) 9555


100%|██████████| 4/4 [00:00<00:00, 22.57it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 9555
RoPE forward torch.Size([1, 65, 12, 32]) 9555
37


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 9620
RoPE forward torch.Size([1, 65, 12, 32]) 9620
RoPE forward torch.Size([1, 65, 12, 32]) 9620
RoPE forward torch.Size([1, 65, 12, 32]) 9620
RoPE forward torch.Size([1, 65, 12, 32]) 9620
RoPE forward torch.Size([1, 65, 12, 32]) 9620
RoPE forward torch.Size([1, 65, 12, 32]) 9620
RoPE forward torch.Size([1, 65, 12, 32]) 9620
RoPE forward torch.Size([1, 65, 12, 32]) 9620
RoPE forward torch.Size([1, 65, 12, 32]) 9620
RoPE forward torch.Size([1, 65, 12, 32]) 9620
RoPE forward torch.Size([1, 65, 12, 32]) 9620
RoPE forward torch.Size([1, 65, 12, 32]) 9620
RoPE forward torch.Size([1, 65, 12, 32]) 9620
RoPE forward torch.Size([1, 65, 12, 32]) 9620
RoPE forward torch.Size([1, 65, 12, 32]) 9620
RoPE forward torch.Size([1, 65, 12, 32]) 9685
RoPE forward torch.Size([1, 65, 12, 32]) 9685
RoPE forward torch.Size([1, 65, 12, 32]) 9685
RoPE forward torch.Size([1, 65, 12, 32]) 9685
RoPE forward torch.Size([1, 65, 12, 32]) 9685
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 24.32it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 9750
RoPE forward torch.Size([1, 65, 12, 32]) 9750
RoPE forward torch.Size([1, 65, 12, 32]) 9815
RoPE forward torch.Size([1, 65, 12, 32]) 9815
RoPE forward torch.Size([1, 65, 12, 32]) 9815
RoPE forward torch.Size([1, 65, 12, 32]) 9815
RoPE forward torch.Size([1, 65, 12, 32]) 9815
RoPE forward torch.Size([1, 65, 12, 32]) 9815
RoPE forward torch.Size([1, 65, 12, 32]) 9815
RoPE forward torch.Size([1, 65, 12, 32]) 9815


100%|██████████| 4/4 [00:00<00:00, 24.39it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 9815
RoPE forward torch.Size([1, 65, 12, 32]) 9815
RoPE forward torch.Size([1, 65, 12, 32]) 9815
RoPE forward torch.Size([1, 65, 12, 32]) 9815
RoPE forward torch.Size([1, 65, 12, 32]) 9815
RoPE forward torch.Size([1, 65, 12, 32]) 9815
RoPE forward torch.Size([1, 65, 12, 32]) 9815
RoPE forward torch.Size([1, 65, 12, 32]) 9815
38


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 9880
RoPE forward torch.Size([1, 65, 12, 32]) 9880
RoPE forward torch.Size([1, 65, 12, 32]) 9880
RoPE forward torch.Size([1, 65, 12, 32]) 9880
RoPE forward torch.Size([1, 65, 12, 32]) 9880
RoPE forward torch.Size([1, 65, 12, 32]) 9880
RoPE forward torch.Size([1, 65, 12, 32]) 9880
RoPE forward torch.Size([1, 65, 12, 32]) 9880
RoPE forward torch.Size([1, 65, 12, 32]) 9880
RoPE forward torch.Size([1, 65, 12, 32]) 9880
RoPE forward torch.Size([1, 65, 12, 32]) 9880
RoPE forward torch.Size([1, 65, 12, 32]) 9880
RoPE forward torch.Size([1, 65, 12, 32]) 9880
RoPE forward torch.Size([1, 65, 12, 32]) 9880
RoPE forward torch.Size([1, 65, 12, 32]) 9880
RoPE forward torch.Size([1, 65, 12, 32]) 9880
RoPE forward torch.Size([1, 65, 12, 32]) 9945
RoPE forward torch.Size([1, 65, 12, 32]) 9945
RoPE forward torch.Size([1, 65, 12, 32]) 9945
RoPE forward torch.Size([1, 65, 12, 32]) 9945
RoPE forward torch.Size([1, 65, 12, 32]) 9945
RoPE forward torch.Size([1, 65, 12

 75%|███████▌  | 3/4 [00:00<00:00, 22.85it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 10075
RoPE forward torch.Size([1, 65, 12, 32]) 10075
RoPE forward torch.Size([1, 65, 12, 32]) 10075
RoPE forward torch.Size([1, 65, 12, 32]) 10075
RoPE forward torch.Size([1, 65, 12, 32]) 10075
RoPE forward torch.Size([1, 65, 12, 32]) 10075
RoPE forward torch.Size([1, 65, 12, 32]) 10075
RoPE forward torch.Size([1, 65, 12, 32]) 10075
RoPE forward torch.Size([1, 65, 12, 32]) 10075
RoPE forward torch.Size([1, 65, 12, 32]) 10075
RoPE forward torch.Size([1, 65, 12, 32]) 10075
RoPE forward torch.Size([1, 65, 12, 32]) 10075
RoPE forward torch.Size([1, 65, 12, 32]) 10075
RoPE forward torch.Size([1, 65, 12, 32]) 10075


100%|██████████| 4/4 [00:00<00:00, 22.73it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 10075
RoPE forward torch.Size([1, 65, 12, 32]) 10075
39


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 10140
RoPE forward torch.Size([1, 65, 12, 32]) 10140
RoPE forward torch.Size([1, 65, 12, 32]) 10140
RoPE forward torch.Size([1, 65, 12, 32]) 10140
RoPE forward torch.Size([1, 65, 12, 32]) 10140
RoPE forward torch.Size([1, 65, 12, 32]) 10140
RoPE forward torch.Size([1, 65, 12, 32]) 10140
RoPE forward torch.Size([1, 65, 12, 32]) 10140
RoPE forward torch.Size([1, 65, 12, 32]) 10140
RoPE forward torch.Size([1, 65, 12, 32]) 10140
RoPE forward torch.Size([1, 65, 12, 32]) 10140
RoPE forward torch.Size([1, 65, 12, 32]) 10140
RoPE forward torch.Size([1, 65, 12, 32]) 10140
RoPE forward torch.Size([1, 65, 12, 32]) 10140
RoPE forward torch.Size([1, 65, 12, 32]) 10140
RoPE forward torch.Size([1, 65, 12, 32]) 10140
RoPE forward torch.Size([1, 65, 12, 32]) 10205
RoPE forward torch.Size([1, 65, 12, 32]) 10205
RoPE forward torch.Size([1, 65, 12, 32]) 10205
RoPE forward torch.Size([1, 65, 12, 32]) 10205
RoPE forward torch.Size([1, 65, 12, 32]) 10205
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 20.99it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 10335
RoPE forward torch.Size([1, 65, 12, 32]) 10335
RoPE forward torch.Size([1, 65, 12, 32]) 10335
RoPE forward torch.Size([1, 65, 12, 32]) 10335
RoPE forward torch.Size([1, 65, 12, 32]) 10335
RoPE forward torch.Size([1, 65, 12, 32]) 10335
RoPE forward torch.Size([1, 65, 12, 32]) 10335
RoPE forward torch.Size([1, 65, 12, 32]) 10335
RoPE forward torch.Size([1, 65, 12, 32]) 10335
RoPE forward torch.Size([1, 65, 12, 32]) 10335
RoPE forward torch.Size([1, 65, 12, 32]) 10335
RoPE forward torch.Size([1, 65, 12, 32]) 10335
RoPE forward torch.Size([1, 65, 12, 32]) 10335
RoPE forward torch.Size([1, 65, 12, 32]) 10335
RoPE forward torch.Size([1, 65, 12, 32]) 10335
RoPE forward torch.Size([1, 65, 12, 32]) 10335


100%|██████████| 4/4 [00:00<00:00, 21.23it/s]


40


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 10400
RoPE forward torch.Size([1, 65, 12, 32]) 10400
RoPE forward torch.Size([1, 65, 12, 32]) 10400
RoPE forward torch.Size([1, 65, 12, 32]) 10400
RoPE forward torch.Size([1, 65, 12, 32]) 10400
RoPE forward torch.Size([1, 65, 12, 32]) 10400
RoPE forward torch.Size([1, 65, 12, 32]) 10400
RoPE forward torch.Size([1, 65, 12, 32]) 10400
RoPE forward torch.Size([1, 65, 12, 32]) 10400
RoPE forward torch.Size([1, 65, 12, 32]) 10400
RoPE forward torch.Size([1, 65, 12, 32]) 10400
RoPE forward torch.Size([1, 65, 12, 32]) 10400
RoPE forward torch.Size([1, 65, 12, 32]) 10400
RoPE forward torch.Size([1, 65, 12, 32]) 10400
RoPE forward torch.Size([1, 65, 12, 32]) 10400
RoPE forward torch.Size([1, 65, 12, 32]) 10400
RoPE forward torch.Size([1, 65, 12, 32]) 10465
RoPE forward torch.Size([1, 65, 12, 32]) 10465
RoPE forward torch.Size([1, 65, 12, 32]) 10465
RoPE forward torch.Size([1, 65, 12, 32]) 10465
RoPE forward torch.Size([1, 65, 12, 32]) 10465
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 23.01it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 10530
RoPE forward torch.Size([1, 65, 12, 32]) 10530
RoPE forward torch.Size([1, 65, 12, 32]) 10530
RoPE forward torch.Size([1, 65, 12, 32]) 10530
RoPE forward torch.Size([1, 65, 12, 32]) 10530
RoPE forward torch.Size([1, 65, 12, 32]) 10530
RoPE forward torch.Size([1, 65, 12, 32]) 10595
RoPE forward torch.Size([1, 65, 12, 32]) 10595
RoPE forward torch.Size([1, 65, 12, 32]) 10595
RoPE forward torch.Size([1, 65, 12, 32]) 10595
RoPE forward torch.Size([1, 65, 12, 32]) 10595
RoPE forward torch.Size([1, 65, 12, 32]) 10595
RoPE forward torch.Size([1, 65, 12, 32]) 10595
RoPE forward torch.Size([1, 65, 12, 32]) 10595
RoPE forward torch.Size([1, 65, 12, 32]) 10595
RoPE forward torch.Size([1, 65, 12, 32]) 10595
RoPE forward torch.Size([1, 65, 12, 32]) 10595
RoPE forward torch.Size([1, 65, 12, 32]) 10595
RoPE forward torch.Size([1, 65, 12, 32]) 10595
RoPE forward torch.Size([1, 65, 12, 32]) 10595
RoPE forward torch.Size([1, 65, 12, 32]) 10595
RoPE forward 

100%|██████████| 4/4 [00:00<00:00, 22.87it/s]


41


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 10660
RoPE forward torch.Size([1, 65, 12, 32]) 10660
RoPE forward torch.Size([1, 65, 12, 32]) 10660
RoPE forward torch.Size([1, 65, 12, 32]) 10660
RoPE forward torch.Size([1, 65, 12, 32]) 10660
RoPE forward torch.Size([1, 65, 12, 32]) 10660
RoPE forward torch.Size([1, 65, 12, 32]) 10660
RoPE forward torch.Size([1, 65, 12, 32]) 10660
RoPE forward torch.Size([1, 65, 12, 32]) 10660
RoPE forward torch.Size([1, 65, 12, 32]) 10660
RoPE forward torch.Size([1, 65, 12, 32]) 10660
RoPE forward torch.Size([1, 65, 12, 32]) 10660
RoPE forward torch.Size([1, 65, 12, 32]) 10660
RoPE forward torch.Size([1, 65, 12, 32]) 10660
RoPE forward torch.Size([1, 65, 12, 32]) 10660
RoPE forward torch.Size([1, 65, 12, 32]) 10660
RoPE forward torch.Size([1, 65, 12, 32]) 10725
RoPE forward torch.Size([1, 65, 12, 32]) 10725
RoPE forward torch.Size([1, 65, 12, 32]) 10725
RoPE forward torch.Size([1, 65, 12, 32]) 10725
RoPE forward torch.Size([1, 65, 12, 32]) 10725
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 22.04it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 10790
RoPE forward torch.Size([1, 65, 12, 32]) 10790
RoPE forward torch.Size([1, 65, 12, 32]) 10790
RoPE forward torch.Size([1, 65, 12, 32]) 10790
RoPE forward torch.Size([1, 65, 12, 32]) 10790
RoPE forward torch.Size([1, 65, 12, 32]) 10790
RoPE forward torch.Size([1, 65, 12, 32]) 10790
RoPE forward torch.Size([1, 65, 12, 32]) 10790
RoPE forward torch.Size([1, 65, 12, 32]) 10855
RoPE forward torch.Size([1, 65, 12, 32]) 10855
RoPE forward torch.Size([1, 65, 12, 32]) 10855
RoPE forward torch.Size([1, 65, 12, 32]) 10855
RoPE forward torch.Size([1, 65, 12, 32]) 10855
RoPE forward torch.Size([1, 65, 12, 32]) 10855
RoPE forward torch.Size([1, 65, 12, 32]) 10855
RoPE forward torch.Size([1, 65, 12, 32]) 10855
RoPE forward torch.Size([1, 65, 12, 32]) 10855
RoPE forward torch.Size([1, 65, 12, 32]) 10855


100%|██████████| 4/4 [00:00<00:00, 21.78it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 10855
RoPE forward torch.Size([1, 65, 12, 32]) 10855
RoPE forward torch.Size([1, 65, 12, 32]) 10855
RoPE forward torch.Size([1, 65, 12, 32]) 10855
RoPE forward torch.Size([1, 65, 12, 32]) 10855
RoPE forward torch.Size([1, 65, 12, 32]) 10855
42


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 10920
RoPE forward torch.Size([1, 65, 12, 32]) 10920
RoPE forward torch.Size([1, 65, 12, 32]) 10920
RoPE forward torch.Size([1, 65, 12, 32]) 10920
RoPE forward torch.Size([1, 65, 12, 32]) 10920
RoPE forward torch.Size([1, 65, 12, 32]) 10920
RoPE forward torch.Size([1, 65, 12, 32]) 10920
RoPE forward torch.Size([1, 65, 12, 32]) 10920
RoPE forward torch.Size([1, 65, 12, 32]) 10920
RoPE forward torch.Size([1, 65, 12, 32]) 10920
RoPE forward torch.Size([1, 65, 12, 32]) 10920
RoPE forward torch.Size([1, 65, 12, 32]) 10920
RoPE forward torch.Size([1, 65, 12, 32]) 10920
RoPE forward torch.Size([1, 65, 12, 32]) 10920
RoPE forward torch.Size([1, 65, 12, 32]) 10920
RoPE forward torch.Size([1, 65, 12, 32]) 10920
RoPE forward torch.Size([1, 65, 12, 32]) 10985
RoPE forward torch.Size([1, 65, 12, 32]) 10985
RoPE forward torch.Size([1, 65, 12, 32]) 10985
RoPE forward torch.Size([1, 65, 12, 32]) 10985
RoPE forward torch.Size([1, 65, 12, 32]) 10985
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 22.35it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 11115
RoPE forward torch.Size([1, 65, 12, 32]) 11115
RoPE forward torch.Size([1, 65, 12, 32]) 11115
RoPE forward torch.Size([1, 65, 12, 32]) 11115
RoPE forward torch.Size([1, 65, 12, 32]) 11115
RoPE forward torch.Size([1, 65, 12, 32]) 11115
RoPE forward torch.Size([1, 65, 12, 32]) 11115
RoPE forward torch.Size([1, 65, 12, 32]) 11115


100%|██████████| 4/4 [00:00<00:00, 21.77it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 11115
RoPE forward torch.Size([1, 65, 12, 32]) 11115
RoPE forward torch.Size([1, 65, 12, 32]) 11115
RoPE forward torch.Size([1, 65, 12, 32]) 11115
RoPE forward torch.Size([1, 65, 12, 32]) 11115
RoPE forward torch.Size([1, 65, 12, 32]) 11115
RoPE forward torch.Size([1, 65, 12, 32]) 11115
RoPE forward torch.Size([1, 65, 12, 32]) 11115
43


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 11180
RoPE forward torch.Size([1, 65, 12, 32]) 11180
RoPE forward torch.Size([1, 65, 12, 32]) 11180
RoPE forward torch.Size([1, 65, 12, 32]) 11180
RoPE forward torch.Size([1, 65, 12, 32]) 11180
RoPE forward torch.Size([1, 65, 12, 32]) 11180
RoPE forward torch.Size([1, 65, 12, 32]) 11180
RoPE forward torch.Size([1, 65, 12, 32]) 11180
RoPE forward torch.Size([1, 65, 12, 32]) 11180
RoPE forward torch.Size([1, 65, 12, 32]) 11180
RoPE forward torch.Size([1, 65, 12, 32]) 11180
RoPE forward torch.Size([1, 65, 12, 32]) 11180
RoPE forward torch.Size([1, 65, 12, 32]) 11180
RoPE forward torch.Size([1, 65, 12, 32]) 11180
RoPE forward torch.Size([1, 65, 12, 32]) 11180
RoPE forward torch.Size([1, 65, 12, 32]) 11180
RoPE forward torch.Size([1, 65, 12, 32]) 11245
RoPE forward torch.Size([1, 65, 12, 32]) 11245
RoPE forward torch.Size([1, 65, 12, 32]) 11245
RoPE forward torch.Size([1, 65, 12, 32]) 11245
RoPE forward torch.Size([1, 65, 12, 32]) 11245
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 21.74it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 11310
RoPE forward torch.Size([1, 65, 12, 32]) 11310
RoPE forward torch.Size([1, 65, 12, 32]) 11375
RoPE forward torch.Size([1, 65, 12, 32]) 11375
RoPE forward torch.Size([1, 65, 12, 32]) 11375
RoPE forward torch.Size([1, 65, 12, 32]) 11375
RoPE forward torch.Size([1, 65, 12, 32]) 11375
RoPE forward torch.Size([1, 65, 12, 32]) 11375
RoPE forward torch.Size([1, 65, 12, 32]) 11375
RoPE forward torch.Size([1, 65, 12, 32]) 11375
RoPE forward torch.Size([1, 65, 12, 32]) 11375
RoPE forward torch.Size([1, 65, 12, 32]) 11375
RoPE forward torch.Size([1, 65, 12, 32]) 11375
RoPE forward torch.Size([1, 65, 12, 32]) 11375
RoPE forward torch.Size([1, 65, 12, 32]) 11375
RoPE forward torch.Size([1, 65, 12, 32]) 11375
RoPE forward torch.Size([1, 65, 12, 32]) 11375
RoPE forward torch.Size([1, 65, 12, 32]) 11375


100%|██████████| 4/4 [00:00<00:00, 22.27it/s]


44


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 11440
RoPE forward torch.Size([1, 65, 12, 32]) 11440
RoPE forward torch.Size([1, 65, 12, 32]) 11440
RoPE forward torch.Size([1, 65, 12, 32]) 11440
RoPE forward torch.Size([1, 65, 12, 32]) 11440
RoPE forward torch.Size([1, 65, 12, 32]) 11440
RoPE forward torch.Size([1, 65, 12, 32]) 11440
RoPE forward torch.Size([1, 65, 12, 32]) 11440
RoPE forward torch.Size([1, 65, 12, 32]) 11440
RoPE forward torch.Size([1, 65, 12, 32]) 11440
RoPE forward torch.Size([1, 65, 12, 32]) 11440
RoPE forward torch.Size([1, 65, 12, 32]) 11440
RoPE forward torch.Size([1, 65, 12, 32]) 11440
RoPE forward torch.Size([1, 65, 12, 32]) 11440
RoPE forward torch.Size([1, 65, 12, 32]) 11440
RoPE forward torch.Size([1, 65, 12, 32]) 11440
RoPE forward torch.Size([1, 65, 12, 32]) 11505
RoPE forward torch.Size([1, 65, 12, 32]) 11505
RoPE forward torch.Size([1, 65, 12, 32]) 11505
RoPE forward torch.Size([1, 65, 12, 32]) 11505
RoPE forward torch.Size([1, 65, 12, 32]) 11505
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 23.36it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 11635
RoPE forward torch.Size([1, 65, 12, 32]) 11635
RoPE forward torch.Size([1, 65, 12, 32]) 11635
RoPE forward torch.Size([1, 65, 12, 32]) 11635
RoPE forward torch.Size([1, 65, 12, 32]) 11635
RoPE forward torch.Size([1, 65, 12, 32]) 11635
RoPE forward torch.Size([1, 65, 12, 32]) 11635
RoPE forward torch.Size([1, 65, 12, 32]) 11635
RoPE forward torch.Size([1, 65, 12, 32]) 11635
RoPE forward torch.Size([1, 65, 12, 32]) 11635
RoPE forward torch.Size([1, 65, 12, 32]) 11635
RoPE forward torch.Size([1, 65, 12, 32]) 11635
RoPE forward torch.Size([1, 65, 12, 32]) 11635
RoPE forward torch.Size([1, 65, 12, 32]) 11635
RoPE forward torch.Size([1, 65, 12, 32]) 11635
RoPE forward torch.Size([1, 65, 12, 32]) 11635


100%|██████████| 4/4 [00:00<00:00, 22.46it/s]


45


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 11700
RoPE forward torch.Size([1, 65, 12, 32]) 11700
RoPE forward torch.Size([1, 65, 12, 32]) 11700
RoPE forward torch.Size([1, 65, 12, 32]) 11700
RoPE forward torch.Size([1, 65, 12, 32]) 11700
RoPE forward torch.Size([1, 65, 12, 32]) 11700
RoPE forward torch.Size([1, 65, 12, 32]) 11700
RoPE forward torch.Size([1, 65, 12, 32]) 11700
RoPE forward torch.Size([1, 65, 12, 32]) 11700
RoPE forward torch.Size([1, 65, 12, 32]) 11700
RoPE forward torch.Size([1, 65, 12, 32]) 11700
RoPE forward torch.Size([1, 65, 12, 32]) 11700
RoPE forward torch.Size([1, 65, 12, 32]) 11700
RoPE forward torch.Size([1, 65, 12, 32]) 11700
RoPE forward torch.Size([1, 65, 12, 32]) 11700
RoPE forward torch.Size([1, 65, 12, 32]) 11700
RoPE forward torch.Size([1, 65, 12, 32]) 11765
RoPE forward torch.Size([1, 65, 12, 32]) 11765
RoPE forward torch.Size([1, 65, 12, 32]) 11765
RoPE forward torch.Size([1, 65, 12, 32]) 11765
RoPE forward torch.Size([1, 65, 12, 32]) 11765
RoPE forward 

 50%|█████     | 2/4 [00:00<00:00, 18.21it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 11765
RoPE forward torch.Size([1, 65, 12, 32]) 11765
RoPE forward torch.Size([1, 65, 12, 32]) 11830
RoPE forward torch.Size([1, 65, 12, 32]) 11830
RoPE forward torch.Size([1, 65, 12, 32]) 11830
RoPE forward torch.Size([1, 65, 12, 32]) 11830
RoPE forward torch.Size([1, 65, 12, 32]) 11830
RoPE forward torch.Size([1, 65, 12, 32]) 11830
RoPE forward torch.Size([1, 65, 12, 32]) 11830
RoPE forward torch.Size([1, 65, 12, 32]) 11830
RoPE forward torch.Size([1, 65, 12, 32]) 11830
RoPE forward torch.Size([1, 65, 12, 32]) 11830
RoPE forward torch.Size([1, 65, 12, 32]) 11830
RoPE forward torch.Size([1, 65, 12, 32]) 11830
RoPE forward torch.Size([1, 65, 12, 32]) 11830
RoPE forward torch.Size([1, 65, 12, 32]) 11830
RoPE forward torch.Size([1, 65, 12, 32]) 11830
RoPE forward torch.Size([1, 65, 12, 32]) 11830
RoPE forward torch.Size([1, 65, 12, 32]) 11895
RoPE forward torch.Size([1, 65, 12, 32]) 11895
RoPE forward torch.Size([1, 65, 12, 32]) 11895
RoPE forward 

100%|██████████| 4/4 [00:00<00:00, 18.65it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 11895
RoPE forward torch.Size([1, 65, 12, 32]) 11895
46


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 11960
RoPE forward torch.Size([1, 65, 12, 32]) 11960
RoPE forward torch.Size([1, 65, 12, 32]) 11960
RoPE forward torch.Size([1, 65, 12, 32]) 11960
RoPE forward torch.Size([1, 65, 12, 32]) 11960
RoPE forward torch.Size([1, 65, 12, 32]) 11960
RoPE forward torch.Size([1, 65, 12, 32]) 11960
RoPE forward torch.Size([1, 65, 12, 32]) 11960
RoPE forward torch.Size([1, 65, 12, 32]) 11960
RoPE forward torch.Size([1, 65, 12, 32]) 11960
RoPE forward torch.Size([1, 65, 12, 32]) 11960
RoPE forward torch.Size([1, 65, 12, 32]) 11960
RoPE forward torch.Size([1, 65, 12, 32]) 11960
RoPE forward torch.Size([1, 65, 12, 32]) 11960
RoPE forward torch.Size([1, 65, 12, 32]) 11960
RoPE forward torch.Size([1, 65, 12, 32]) 11960
RoPE forward torch.Size([1, 65, 12, 32]) 12025
RoPE forward torch.Size([1, 65, 12, 32]) 12025
RoPE forward torch.Size([1, 65, 12, 32]) 12025
RoPE forward torch.Size([1, 65, 12, 32]) 12025
RoPE forward torch.Size([1, 65, 12, 32]) 12025
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 21.43it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 12090
RoPE forward torch.Size([1, 65, 12, 32]) 12090
RoPE forward torch.Size([1, 65, 12, 32]) 12090
RoPE forward torch.Size([1, 65, 12, 32]) 12090
RoPE forward torch.Size([1, 65, 12, 32]) 12155
RoPE forward torch.Size([1, 65, 12, 32]) 12155
RoPE forward torch.Size([1, 65, 12, 32]) 12155
RoPE forward torch.Size([1, 65, 12, 32]) 12155
RoPE forward torch.Size([1, 65, 12, 32]) 12155
RoPE forward torch.Size([1, 65, 12, 32]) 12155
RoPE forward torch.Size([1, 65, 12, 32]) 12155
RoPE forward torch.Size([1, 65, 12, 32]) 12155
RoPE forward torch.Size([1, 65, 12, 32]) 12155
RoPE forward torch.Size([1, 65, 12, 32]) 12155
RoPE forward torch.Size([1, 65, 12, 32]) 12155
RoPE forward torch.Size([1, 65, 12, 32]) 12155
RoPE forward torch.Size([1, 65, 12, 32]) 12155
RoPE forward torch.Size([1, 65, 12, 32]) 12155
RoPE forward torch.Size([1, 65, 12, 32]) 12155
RoPE forward torch.Size([1, 65, 12, 32]) 12155


100%|██████████| 4/4 [00:00<00:00, 21.18it/s]


47


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 12220
RoPE forward torch.Size([1, 65, 12, 32]) 12220
RoPE forward torch.Size([1, 65, 12, 32]) 12220
RoPE forward torch.Size([1, 65, 12, 32]) 12220
RoPE forward torch.Size([1, 65, 12, 32]) 12220
RoPE forward torch.Size([1, 65, 12, 32]) 12220
RoPE forward torch.Size([1, 65, 12, 32]) 12220
RoPE forward torch.Size([1, 65, 12, 32]) 12220
RoPE forward torch.Size([1, 65, 12, 32]) 12220
RoPE forward torch.Size([1, 65, 12, 32]) 12220
RoPE forward torch.Size([1, 65, 12, 32]) 12220
RoPE forward torch.Size([1, 65, 12, 32]) 12220
RoPE forward torch.Size([1, 65, 12, 32]) 12220
RoPE forward torch.Size([1, 65, 12, 32]) 12220
RoPE forward torch.Size([1, 65, 12, 32]) 12220
RoPE forward torch.Size([1, 65, 12, 32]) 12220
RoPE forward torch.Size([1, 65, 12, 32]) 12285
RoPE forward torch.Size([1, 65, 12, 32]) 12285
RoPE forward torch.Size([1, 65, 12, 32]) 12285
RoPE forward torch.Size([1, 65, 12, 32]) 12285
RoPE forward torch.Size([1, 65, 12, 32]) 12285
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 20.80it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 12415
RoPE forward torch.Size([1, 65, 12, 32]) 12415
RoPE forward torch.Size([1, 65, 12, 32]) 12415
RoPE forward torch.Size([1, 65, 12, 32]) 12415
RoPE forward torch.Size([1, 65, 12, 32]) 12415
RoPE forward torch.Size([1, 65, 12, 32]) 12415
RoPE forward torch.Size([1, 65, 12, 32]) 12415
RoPE forward torch.Size([1, 65, 12, 32]) 12415
RoPE forward torch.Size([1, 65, 12, 32]) 12415
RoPE forward torch.Size([1, 65, 12, 32]) 12415
RoPE forward torch.Size([1, 65, 12, 32]) 12415
RoPE forward torch.Size([1, 65, 12, 32]) 12415


100%|██████████| 4/4 [00:00<00:00, 20.95it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 12415
RoPE forward torch.Size([1, 65, 12, 32]) 12415
RoPE forward torch.Size([1, 65, 12, 32]) 12415
RoPE forward torch.Size([1, 65, 12, 32]) 12415
48


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 12480
RoPE forward torch.Size([1, 65, 12, 32]) 12480
RoPE forward torch.Size([1, 65, 12, 32]) 12480
RoPE forward torch.Size([1, 65, 12, 32]) 12480
RoPE forward torch.Size([1, 65, 12, 32]) 12480
RoPE forward torch.Size([1, 65, 12, 32]) 12480
RoPE forward torch.Size([1, 65, 12, 32]) 12480
RoPE forward torch.Size([1, 65, 12, 32]) 12480
RoPE forward torch.Size([1, 65, 12, 32]) 12480
RoPE forward torch.Size([1, 65, 12, 32]) 12480
RoPE forward torch.Size([1, 65, 12, 32]) 12480
RoPE forward torch.Size([1, 65, 12, 32]) 12480
RoPE forward torch.Size([1, 65, 12, 32]) 12480
RoPE forward torch.Size([1, 65, 12, 32]) 12480
RoPE forward torch.Size([1, 65, 12, 32]) 12480
RoPE forward torch.Size([1, 65, 12, 32]) 12480
RoPE forward torch.Size([1, 65, 12, 32]) 12545
RoPE forward torch.Size([1, 65, 12, 32]) 12545
RoPE forward torch.Size([1, 65, 12, 32]) 12545
RoPE forward torch.Size([1, 65, 12, 32]) 12545
RoPE forward torch.Size([1, 65, 12, 32]) 12545
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 22.85it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 12675
RoPE forward torch.Size([1, 65, 12, 32]) 12675
RoPE forward torch.Size([1, 65, 12, 32]) 12675
RoPE forward torch.Size([1, 65, 12, 32]) 12675
RoPE forward torch.Size([1, 65, 12, 32]) 12675
RoPE forward torch.Size([1, 65, 12, 32]) 12675
RoPE forward torch.Size([1, 65, 12, 32]) 12675
RoPE forward torch.Size([1, 65, 12, 32]) 12675
RoPE forward torch.Size([1, 65, 12, 32]) 12675
RoPE forward torch.Size([1, 65, 12, 32]) 12675
RoPE forward torch.Size([1, 65, 12, 32]) 12675
RoPE forward torch.Size([1, 65, 12, 32]) 12675
RoPE forward torch.Size([1, 65, 12, 32]) 12675
RoPE forward torch.Size([1, 65, 12, 32]) 12675
RoPE forward torch.Size([1, 65, 12, 32]) 12675
RoPE forward torch.Size([1, 65, 12, 32]) 12675


100%|██████████| 4/4 [00:00<00:00, 22.84it/s]


49


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 12740
RoPE forward torch.Size([1, 65, 12, 32]) 12740
RoPE forward torch.Size([1, 65, 12, 32]) 12740
RoPE forward torch.Size([1, 65, 12, 32]) 12740
RoPE forward torch.Size([1, 65, 12, 32]) 12740
RoPE forward torch.Size([1, 65, 12, 32]) 12740
RoPE forward torch.Size([1, 65, 12, 32]) 12740
RoPE forward torch.Size([1, 65, 12, 32]) 12740
RoPE forward torch.Size([1, 65, 12, 32]) 12740
RoPE forward torch.Size([1, 65, 12, 32]) 12740
RoPE forward torch.Size([1, 65, 12, 32]) 12740
RoPE forward torch.Size([1, 65, 12, 32]) 12740
RoPE forward torch.Size([1, 65, 12, 32]) 12740
RoPE forward torch.Size([1, 65, 12, 32]) 12740
RoPE forward torch.Size([1, 65, 12, 32]) 12740
RoPE forward torch.Size([1, 65, 12, 32]) 12740
RoPE forward torch.Size([1, 65, 12, 32]) 12805
RoPE forward torch.Size([1, 65, 12, 32]) 12805
RoPE forward torch.Size([1, 65, 12, 32]) 12805
RoPE forward torch.Size([1, 65, 12, 32]) 12805
RoPE forward torch.Size([1, 65, 12, 32]) 12805
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 20.73it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 12870
RoPE forward torch.Size([1, 65, 12, 32]) 12870
RoPE forward torch.Size([1, 65, 12, 32]) 12935
RoPE forward torch.Size([1, 65, 12, 32]) 12935
RoPE forward torch.Size([1, 65, 12, 32]) 12935
RoPE forward torch.Size([1, 65, 12, 32]) 12935
RoPE forward torch.Size([1, 65, 12, 32]) 12935
RoPE forward torch.Size([1, 65, 12, 32]) 12935
RoPE forward torch.Size([1, 65, 12, 32]) 12935
RoPE forward torch.Size([1, 65, 12, 32]) 12935
RoPE forward torch.Size([1, 65, 12, 32]) 12935
RoPE forward torch.Size([1, 65, 12, 32]) 12935
RoPE forward torch.Size([1, 65, 12, 32]) 12935
RoPE forward torch.Size([1, 65, 12, 32]) 12935
RoPE forward torch.Size([1, 65, 12, 32]) 12935
RoPE forward torch.Size([1, 65, 12, 32]) 12935


100%|██████████| 4/4 [00:00<00:00, 20.74it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 12935
RoPE forward torch.Size([1, 65, 12, 32]) 12935
50


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 13000
RoPE forward torch.Size([1, 65, 12, 32]) 13000
RoPE forward torch.Size([1, 65, 12, 32]) 13000
RoPE forward torch.Size([1, 65, 12, 32]) 13000
RoPE forward torch.Size([1, 65, 12, 32]) 13000
RoPE forward torch.Size([1, 65, 12, 32]) 13000
RoPE forward torch.Size([1, 65, 12, 32]) 13000
RoPE forward torch.Size([1, 65, 12, 32]) 13000
RoPE forward torch.Size([1, 65, 12, 32]) 13000
RoPE forward torch.Size([1, 65, 12, 32]) 13000
RoPE forward torch.Size([1, 65, 12, 32]) 13000
RoPE forward torch.Size([1, 65, 12, 32]) 13000
RoPE forward torch.Size([1, 65, 12, 32]) 13000
RoPE forward torch.Size([1, 65, 12, 32]) 13000
RoPE forward torch.Size([1, 65, 12, 32]) 13000
RoPE forward torch.Size([1, 65, 12, 32]) 13000
RoPE forward torch.Size([1, 65, 12, 32]) 13065
RoPE forward torch.Size([1, 65, 12, 32]) 13065
RoPE forward torch.Size([1, 65, 12, 32]) 13065
RoPE forward torch.Size([1, 65, 12, 32]) 13065
RoPE forward torch.Size([1, 65, 12, 32]) 13065
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 21.17it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 13130
RoPE forward torch.Size([1, 65, 12, 32]) 13130
RoPE forward torch.Size([1, 65, 12, 32]) 13130
RoPE forward torch.Size([1, 65, 12, 32]) 13130
RoPE forward torch.Size([1, 65, 12, 32]) 13195
RoPE forward torch.Size([1, 65, 12, 32]) 13195
RoPE forward torch.Size([1, 65, 12, 32]) 13195
RoPE forward torch.Size([1, 65, 12, 32]) 13195
RoPE forward torch.Size([1, 65, 12, 32]) 13195
RoPE forward torch.Size([1, 65, 12, 32]) 13195
RoPE forward torch.Size([1, 65, 12, 32]) 13195
RoPE forward torch.Size([1, 65, 12, 32]) 13195
RoPE forward torch.Size([1, 65, 12, 32]) 13195


100%|██████████| 4/4 [00:00<00:00, 20.92it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 13195
RoPE forward torch.Size([1, 65, 12, 32]) 13195
RoPE forward torch.Size([1, 65, 12, 32]) 13195
RoPE forward torch.Size([1, 65, 12, 32]) 13195
RoPE forward torch.Size([1, 65, 12, 32]) 13195
RoPE forward torch.Size([1, 65, 12, 32]) 13195
RoPE forward torch.Size([1, 65, 12, 32]) 13195
51


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 13260
RoPE forward torch.Size([1, 65, 12, 32]) 13260
RoPE forward torch.Size([1, 65, 12, 32]) 13260
RoPE forward torch.Size([1, 65, 12, 32]) 13260
RoPE forward torch.Size([1, 65, 12, 32]) 13260
RoPE forward torch.Size([1, 65, 12, 32]) 13260
RoPE forward torch.Size([1, 65, 12, 32]) 13260
RoPE forward torch.Size([1, 65, 12, 32]) 13260
RoPE forward torch.Size([1, 65, 12, 32]) 13260
RoPE forward torch.Size([1, 65, 12, 32]) 13260
RoPE forward torch.Size([1, 65, 12, 32]) 13260
RoPE forward torch.Size([1, 65, 12, 32]) 13260
RoPE forward torch.Size([1, 65, 12, 32]) 13260
RoPE forward torch.Size([1, 65, 12, 32]) 13260
RoPE forward torch.Size([1, 65, 12, 32]) 13260
RoPE forward torch.Size([1, 65, 12, 32]) 13260
RoPE forward torch.Size([1, 65, 12, 32]) 13325
RoPE forward torch.Size([1, 65, 12, 32]) 13325
RoPE forward torch.Size([1, 65, 12, 32]) 13325
RoPE forward torch.Size([1, 65, 12, 32]) 13325
RoPE forward torch.Size([1, 65, 12, 32]) 13325
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 21.10it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 13455
RoPE forward torch.Size([1, 65, 12, 32]) 13455
RoPE forward torch.Size([1, 65, 12, 32]) 13455
RoPE forward torch.Size([1, 65, 12, 32]) 13455
RoPE forward torch.Size([1, 65, 12, 32]) 13455
RoPE forward torch.Size([1, 65, 12, 32]) 13455
RoPE forward torch.Size([1, 65, 12, 32]) 13455
RoPE forward torch.Size([1, 65, 12, 32]) 13455
RoPE forward torch.Size([1, 65, 12, 32]) 13455
RoPE forward torch.Size([1, 65, 12, 32]) 13455
RoPE forward torch.Size([1, 65, 12, 32]) 13455
RoPE forward torch.Size([1, 65, 12, 32]) 13455


100%|██████████| 4/4 [00:00<00:00, 20.92it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 13455
RoPE forward torch.Size([1, 65, 12, 32]) 13455
RoPE forward torch.Size([1, 65, 12, 32]) 13455
RoPE forward torch.Size([1, 65, 12, 32]) 13455
52


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 13520
RoPE forward torch.Size([1, 65, 12, 32]) 13520
RoPE forward torch.Size([1, 65, 12, 32]) 13520
RoPE forward torch.Size([1, 65, 12, 32]) 13520
RoPE forward torch.Size([1, 65, 12, 32]) 13520
RoPE forward torch.Size([1, 65, 12, 32]) 13520
RoPE forward torch.Size([1, 65, 12, 32]) 13520
RoPE forward torch.Size([1, 65, 12, 32]) 13520
RoPE forward torch.Size([1, 65, 12, 32]) 13520
RoPE forward torch.Size([1, 65, 12, 32]) 13520
RoPE forward torch.Size([1, 65, 12, 32]) 13520
RoPE forward torch.Size([1, 65, 12, 32]) 13520
RoPE forward torch.Size([1, 65, 12, 32]) 13520
RoPE forward torch.Size([1, 65, 12, 32]) 13520
RoPE forward torch.Size([1, 65, 12, 32]) 13520
RoPE forward torch.Size([1, 65, 12, 32]) 13520
RoPE forward torch.Size([1, 65, 12, 32]) 13585
RoPE forward torch.Size([1, 65, 12, 32]) 13585
RoPE forward torch.Size([1, 65, 12, 32]) 13585
RoPE forward torch.Size([1, 65, 12, 32]) 13585
RoPE forward torch.Size([1, 65, 12, 32]) 13585
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 20.85it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 13650
RoPE forward torch.Size([1, 65, 12, 32]) 13650
RoPE forward torch.Size([1, 65, 12, 32]) 13650
RoPE forward torch.Size([1, 65, 12, 32]) 13650
RoPE forward torch.Size([1, 65, 12, 32]) 13715
RoPE forward torch.Size([1, 65, 12, 32]) 13715
RoPE forward torch.Size([1, 65, 12, 32]) 13715
RoPE forward torch.Size([1, 65, 12, 32]) 13715
RoPE forward torch.Size([1, 65, 12, 32]) 13715
RoPE forward torch.Size([1, 65, 12, 32]) 13715
RoPE forward torch.Size([1, 65, 12, 32]) 13715
RoPE forward torch.Size([1, 65, 12, 32]) 13715
RoPE forward torch.Size([1, 65, 12, 32]) 13715
RoPE forward torch.Size([1, 65, 12, 32]) 13715


100%|██████████| 4/4 [00:00<00:00, 21.38it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 13715
RoPE forward torch.Size([1, 65, 12, 32]) 13715
RoPE forward torch.Size([1, 65, 12, 32]) 13715
RoPE forward torch.Size([1, 65, 12, 32]) 13715
RoPE forward torch.Size([1, 65, 12, 32]) 13715
RoPE forward torch.Size([1, 65, 12, 32]) 13715
53


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 13780
RoPE forward torch.Size([1, 65, 12, 32]) 13780
RoPE forward torch.Size([1, 65, 12, 32]) 13780
RoPE forward torch.Size([1, 65, 12, 32]) 13780
RoPE forward torch.Size([1, 65, 12, 32]) 13780
RoPE forward torch.Size([1, 65, 12, 32]) 13780
RoPE forward torch.Size([1, 65, 12, 32]) 13780
RoPE forward torch.Size([1, 65, 12, 32]) 13780
RoPE forward torch.Size([1, 65, 12, 32]) 13780
RoPE forward torch.Size([1, 65, 12, 32]) 13780
RoPE forward torch.Size([1, 65, 12, 32]) 13780
RoPE forward torch.Size([1, 65, 12, 32]) 13780
RoPE forward torch.Size([1, 65, 12, 32]) 13780
RoPE forward torch.Size([1, 65, 12, 32]) 13780
RoPE forward torch.Size([1, 65, 12, 32]) 13780
RoPE forward torch.Size([1, 65, 12, 32]) 13780
RoPE forward torch.Size([1, 65, 12, 32]) 13845
RoPE forward torch.Size([1, 65, 12, 32]) 13845
RoPE forward torch.Size([1, 65, 12, 32]) 13845
RoPE forward torch.Size([1, 65, 12, 32]) 13845
RoPE forward torch.Size([1, 65, 12, 32]) 13845
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 22.79it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 13910
RoPE forward torch.Size([1, 65, 12, 32]) 13910
RoPE forward torch.Size([1, 65, 12, 32]) 13910
RoPE forward torch.Size([1, 65, 12, 32]) 13910
RoPE forward torch.Size([1, 65, 12, 32]) 13910
RoPE forward torch.Size([1, 65, 12, 32]) 13910
RoPE forward torch.Size([1, 65, 12, 32]) 13910
RoPE forward torch.Size([1, 65, 12, 32]) 13910
RoPE forward torch.Size([1, 65, 12, 32]) 13910
RoPE forward torch.Size([1, 65, 12, 32]) 13910
RoPE forward torch.Size([1, 65, 12, 32]) 13910
RoPE forward torch.Size([1, 65, 12, 32]) 13910
RoPE forward torch.Size([1, 65, 12, 32]) 13975
RoPE forward torch.Size([1, 65, 12, 32]) 13975
RoPE forward torch.Size([1, 65, 12, 32]) 13975
RoPE forward torch.Size([1, 65, 12, 32]) 13975
RoPE forward torch.Size([1, 65, 12, 32]) 13975
RoPE forward torch.Size([1, 65, 12, 32]) 13975
RoPE forward torch.Size([1, 65, 12, 32]) 13975
RoPE forward torch.Size([1, 65, 12, 32]) 13975
RoPE forward torch.Size([1, 65, 12, 32]) 13975
RoPE forward 

100%|██████████| 4/4 [00:00<00:00, 22.25it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 13975
RoPE forward torch.Size([1, 65, 12, 32]) 13975
54


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 14040
RoPE forward torch.Size([1, 65, 12, 32]) 14040
RoPE forward torch.Size([1, 65, 12, 32]) 14040
RoPE forward torch.Size([1, 65, 12, 32]) 14040
RoPE forward torch.Size([1, 65, 12, 32]) 14040
RoPE forward torch.Size([1, 65, 12, 32]) 14040
RoPE forward torch.Size([1, 65, 12, 32]) 14040
RoPE forward torch.Size([1, 65, 12, 32]) 14040
RoPE forward torch.Size([1, 65, 12, 32]) 14040
RoPE forward torch.Size([1, 65, 12, 32]) 14040
RoPE forward torch.Size([1, 65, 12, 32]) 14040
RoPE forward torch.Size([1, 65, 12, 32]) 14040
RoPE forward torch.Size([1, 65, 12, 32]) 14040
RoPE forward torch.Size([1, 65, 12, 32]) 14040
RoPE forward torch.Size([1, 65, 12, 32]) 14040
RoPE forward torch.Size([1, 65, 12, 32]) 14040
RoPE forward torch.Size([1, 65, 12, 32]) 14105
RoPE forward torch.Size([1, 65, 12, 32]) 14105
RoPE forward torch.Size([1, 65, 12, 32]) 14105
RoPE forward torch.Size([1, 65, 12, 32]) 14105
RoPE forward torch.Size([1, 65, 12, 32]) 14105
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 21.26it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 14170
RoPE forward torch.Size([1, 65, 12, 32]) 14170
RoPE forward torch.Size([1, 65, 12, 32]) 14170
RoPE forward torch.Size([1, 65, 12, 32]) 14170
RoPE forward torch.Size([1, 65, 12, 32]) 14170
RoPE forward torch.Size([1, 65, 12, 32]) 14170
RoPE forward torch.Size([1, 65, 12, 32]) 14235
RoPE forward torch.Size([1, 65, 12, 32]) 14235
RoPE forward torch.Size([1, 65, 12, 32]) 14235
RoPE forward torch.Size([1, 65, 12, 32]) 14235
RoPE forward torch.Size([1, 65, 12, 32]) 14235
RoPE forward torch.Size([1, 65, 12, 32]) 14235
RoPE forward torch.Size([1, 65, 12, 32]) 14235
RoPE forward torch.Size([1, 65, 12, 32]) 14235
RoPE forward torch.Size([1, 65, 12, 32]) 14235
RoPE forward torch.Size([1, 65, 12, 32]) 14235
RoPE forward torch.Size([1, 65, 12, 32]) 14235
RoPE forward torch.Size([1, 65, 12, 32]) 14235


100%|██████████| 4/4 [00:00<00:00, 21.79it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 14235
RoPE forward torch.Size([1, 65, 12, 32]) 14235
RoPE forward torch.Size([1, 65, 12, 32]) 14235
RoPE forward torch.Size([1, 65, 12, 32]) 14235
55


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 14300
RoPE forward torch.Size([1, 65, 12, 32]) 14300
RoPE forward torch.Size([1, 65, 12, 32]) 14300
RoPE forward torch.Size([1, 65, 12, 32]) 14300
RoPE forward torch.Size([1, 65, 12, 32]) 14300
RoPE forward torch.Size([1, 65, 12, 32]) 14300
RoPE forward torch.Size([1, 65, 12, 32]) 14300
RoPE forward torch.Size([1, 65, 12, 32]) 14300
RoPE forward torch.Size([1, 65, 12, 32]) 14300
RoPE forward torch.Size([1, 65, 12, 32]) 14300
RoPE forward torch.Size([1, 65, 12, 32]) 14300
RoPE forward torch.Size([1, 65, 12, 32]) 14300
RoPE forward torch.Size([1, 65, 12, 32]) 14300
RoPE forward torch.Size([1, 65, 12, 32]) 14300
RoPE forward torch.Size([1, 65, 12, 32]) 14300
RoPE forward torch.Size([1, 65, 12, 32]) 14300
RoPE forward torch.Size([1, 65, 12, 32]) 14365
RoPE forward torch.Size([1, 65, 12, 32]) 14365
RoPE forward torch.Size([1, 65, 12, 32]) 14365
RoPE forward torch.Size([1, 65, 12, 32]) 14365
RoPE forward torch.Size([1, 65, 12, 32]) 14365
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 21.44it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 14430
RoPE forward torch.Size([1, 65, 12, 32]) 14430


100%|██████████| 4/4 [00:00<00:00, 22.02it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 14495
RoPE forward torch.Size([1, 65, 12, 32]) 14495
RoPE forward torch.Size([1, 65, 12, 32]) 14495
RoPE forward torch.Size([1, 65, 12, 32]) 14495
RoPE forward torch.Size([1, 65, 12, 32]) 14495
RoPE forward torch.Size([1, 65, 12, 32]) 14495
RoPE forward torch.Size([1, 65, 12, 32]) 14495
RoPE forward torch.Size([1, 65, 12, 32]) 14495
RoPE forward torch.Size([1, 65, 12, 32]) 14495
RoPE forward torch.Size([1, 65, 12, 32]) 14495
RoPE forward torch.Size([1, 65, 12, 32]) 14495
RoPE forward torch.Size([1, 65, 12, 32]) 14495
RoPE forward torch.Size([1, 65, 12, 32]) 14495
RoPE forward torch.Size([1, 65, 12, 32]) 14495
RoPE forward torch.Size([1, 65, 12, 32]) 14495
RoPE forward torch.Size([1, 65, 12, 32]) 14495
56


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 14560
RoPE forward torch.Size([1, 65, 12, 32]) 14560
RoPE forward torch.Size([1, 65, 12, 32]) 14560
RoPE forward torch.Size([1, 65, 12, 32]) 14560
RoPE forward torch.Size([1, 65, 12, 32]) 14560
RoPE forward torch.Size([1, 65, 12, 32]) 14560
RoPE forward torch.Size([1, 65, 12, 32]) 14560
RoPE forward torch.Size([1, 65, 12, 32]) 14560
RoPE forward torch.Size([1, 65, 12, 32]) 14560
RoPE forward torch.Size([1, 65, 12, 32]) 14560
RoPE forward torch.Size([1, 65, 12, 32]) 14560
RoPE forward torch.Size([1, 65, 12, 32]) 14560
RoPE forward torch.Size([1, 65, 12, 32]) 14560
RoPE forward torch.Size([1, 65, 12, 32]) 14560
RoPE forward torch.Size([1, 65, 12, 32]) 14560
RoPE forward torch.Size([1, 65, 12, 32]) 14560
RoPE forward torch.Size([1, 65, 12, 32]) 14625
RoPE forward torch.Size([1, 65, 12, 32]) 14625
RoPE forward torch.Size([1, 65, 12, 32]) 14625
RoPE forward torch.Size([1, 65, 12, 32]) 14625
RoPE forward torch.Size([1, 65, 12, 32]) 14625
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 22.73it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 14690
RoPE forward torch.Size([1, 65, 12, 32]) 14690
RoPE forward torch.Size([1, 65, 12, 32]) 14690
RoPE forward torch.Size([1, 65, 12, 32]) 14690
RoPE forward torch.Size([1, 65, 12, 32]) 14755
RoPE forward torch.Size([1, 65, 12, 32]) 14755
RoPE forward torch.Size([1, 65, 12, 32]) 14755
RoPE forward torch.Size([1, 65, 12, 32]) 14755
RoPE forward torch.Size([1, 65, 12, 32]) 14755
RoPE forward torch.Size([1, 65, 12, 32]) 14755
RoPE forward torch.Size([1, 65, 12, 32]) 14755
RoPE forward torch.Size([1, 65, 12, 32]) 14755
RoPE forward torch.Size([1, 65, 12, 32]) 14755
RoPE forward torch.Size([1, 65, 12, 32]) 14755
RoPE forward torch.Size([1, 65, 12, 32]) 14755
RoPE forward torch.Size([1, 65, 12, 32]) 14755


100%|██████████| 4/4 [00:00<00:00, 22.87it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 14755
RoPE forward torch.Size([1, 65, 12, 32]) 14755
RoPE forward torch.Size([1, 65, 12, 32]) 14755
RoPE forward torch.Size([1, 65, 12, 32]) 14755
57


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 14820
RoPE forward torch.Size([1, 65, 12, 32]) 14820
RoPE forward torch.Size([1, 65, 12, 32]) 14820
RoPE forward torch.Size([1, 65, 12, 32]) 14820
RoPE forward torch.Size([1, 65, 12, 32]) 14820
RoPE forward torch.Size([1, 65, 12, 32]) 14820
RoPE forward torch.Size([1, 65, 12, 32]) 14820
RoPE forward torch.Size([1, 65, 12, 32]) 14820
RoPE forward torch.Size([1, 65, 12, 32]) 14820
RoPE forward torch.Size([1, 65, 12, 32]) 14820
RoPE forward torch.Size([1, 65, 12, 32]) 14820
RoPE forward torch.Size([1, 65, 12, 32]) 14820
RoPE forward torch.Size([1, 65, 12, 32]) 14820
RoPE forward torch.Size([1, 65, 12, 32]) 14820
RoPE forward torch.Size([1, 65, 12, 32]) 14820
RoPE forward torch.Size([1, 65, 12, 32]) 14820
RoPE forward torch.Size([1, 65, 12, 32]) 14885
RoPE forward torch.Size([1, 65, 12, 32]) 14885
RoPE forward torch.Size([1, 65, 12, 32]) 14885
RoPE forward torch.Size([1, 65, 12, 32]) 14885
RoPE forward torch.Size([1, 65, 12, 32]) 14885
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 21.48it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 15015
RoPE forward torch.Size([1, 65, 12, 32]) 15015
RoPE forward torch.Size([1, 65, 12, 32]) 15015
RoPE forward torch.Size([1, 65, 12, 32]) 15015
RoPE forward torch.Size([1, 65, 12, 32]) 15015
RoPE forward torch.Size([1, 65, 12, 32]) 15015
RoPE forward torch.Size([1, 65, 12, 32]) 15015
RoPE forward torch.Size([1, 65, 12, 32]) 15015
RoPE forward torch.Size([1, 65, 12, 32]) 15015
RoPE forward torch.Size([1, 65, 12, 32]) 15015
RoPE forward torch.Size([1, 65, 12, 32]) 15015
RoPE forward torch.Size([1, 65, 12, 32]) 15015
RoPE forward torch.Size([1, 65, 12, 32]) 15015
RoPE forward torch.Size([1, 65, 12, 32]) 15015
RoPE forward torch.Size([1, 65, 12, 32]) 15015
RoPE forward torch.Size([1, 65, 12, 32]) 15015


100%|██████████| 4/4 [00:00<00:00, 21.89it/s]


58


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 15080
RoPE forward torch.Size([1, 65, 12, 32]) 15080
RoPE forward torch.Size([1, 65, 12, 32]) 15080
RoPE forward torch.Size([1, 65, 12, 32]) 15080
RoPE forward torch.Size([1, 65, 12, 32]) 15080
RoPE forward torch.Size([1, 65, 12, 32]) 15080
RoPE forward torch.Size([1, 65, 12, 32]) 15080
RoPE forward torch.Size([1, 65, 12, 32]) 15080
RoPE forward torch.Size([1, 65, 12, 32]) 15080
RoPE forward torch.Size([1, 65, 12, 32]) 15080
RoPE forward torch.Size([1, 65, 12, 32]) 15080
RoPE forward torch.Size([1, 65, 12, 32]) 15080
RoPE forward torch.Size([1, 65, 12, 32]) 15080
RoPE forward torch.Size([1, 65, 12, 32]) 15080
RoPE forward torch.Size([1, 65, 12, 32]) 15080
RoPE forward torch.Size([1, 65, 12, 32]) 15080
RoPE forward torch.Size([1, 65, 12, 32]) 15145
RoPE forward torch.Size([1, 65, 12, 32]) 15145
RoPE forward torch.Size([1, 65, 12, 32]) 15145
RoPE forward torch.Size([1, 65, 12, 32]) 15145
RoPE forward torch.Size([1, 65, 12, 32]) 15145
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 23.04it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 15275
RoPE forward torch.Size([1, 65, 12, 32]) 15275
RoPE forward torch.Size([1, 65, 12, 32]) 15275
RoPE forward torch.Size([1, 65, 12, 32]) 15275
RoPE forward torch.Size([1, 65, 12, 32]) 15275
RoPE forward torch.Size([1, 65, 12, 32]) 15275
RoPE forward torch.Size([1, 65, 12, 32]) 15275
RoPE forward torch.Size([1, 65, 12, 32]) 15275
RoPE forward torch.Size([1, 65, 12, 32]) 15275
RoPE forward torch.Size([1, 65, 12, 32]) 15275
RoPE forward torch.Size([1, 65, 12, 32]) 15275
RoPE forward torch.Size([1, 65, 12, 32]) 15275


100%|██████████| 4/4 [00:00<00:00, 22.31it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 15275
RoPE forward torch.Size([1, 65, 12, 32]) 15275
RoPE forward torch.Size([1, 65, 12, 32]) 15275
RoPE forward torch.Size([1, 65, 12, 32]) 15275
59


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 15340
RoPE forward torch.Size([1, 65, 12, 32]) 15340
RoPE forward torch.Size([1, 65, 12, 32]) 15340
RoPE forward torch.Size([1, 65, 12, 32]) 15340
RoPE forward torch.Size([1, 65, 12, 32]) 15340
RoPE forward torch.Size([1, 65, 12, 32]) 15340
RoPE forward torch.Size([1, 65, 12, 32]) 15340
RoPE forward torch.Size([1, 65, 12, 32]) 15340
RoPE forward torch.Size([1, 65, 12, 32]) 15340
RoPE forward torch.Size([1, 65, 12, 32]) 15340
RoPE forward torch.Size([1, 65, 12, 32]) 15340
RoPE forward torch.Size([1, 65, 12, 32]) 15340
RoPE forward torch.Size([1, 65, 12, 32]) 15340
RoPE forward torch.Size([1, 65, 12, 32]) 15340
RoPE forward torch.Size([1, 65, 12, 32]) 15340
RoPE forward torch.Size([1, 65, 12, 32]) 15340
RoPE forward torch.Size([1, 65, 12, 32]) 15405
RoPE forward torch.Size([1, 65, 12, 32]) 15405
RoPE forward torch.Size([1, 65, 12, 32]) 15405
RoPE forward torch.Size([1, 65, 12, 32]) 15405
RoPE forward torch.Size([1, 65, 12, 32]) 15405
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 20.22it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 15470
RoPE forward torch.Size([1, 65, 12, 32]) 15470
RoPE forward torch.Size([1, 65, 12, 32]) 15535
RoPE forward torch.Size([1, 65, 12, 32]) 15535
RoPE forward torch.Size([1, 65, 12, 32]) 15535
RoPE forward torch.Size([1, 65, 12, 32]) 15535
RoPE forward torch.Size([1, 65, 12, 32]) 15535
RoPE forward torch.Size([1, 65, 12, 32]) 15535
RoPE forward torch.Size([1, 65, 12, 32]) 15535
RoPE forward torch.Size([1, 65, 12, 32]) 15535
RoPE forward torch.Size([1, 65, 12, 32]) 15535
RoPE forward torch.Size([1, 65, 12, 32]) 15535
RoPE forward torch.Size([1, 65, 12, 32]) 15535
RoPE forward torch.Size([1, 65, 12, 32]) 15535
RoPE forward torch.Size([1, 65, 12, 32]) 15535
RoPE forward torch.Size([1, 65, 12, 32]) 15535
RoPE forward torch.Size([1, 65, 12, 32]) 15535
RoPE forward torch.Size([1, 65, 12, 32]) 15535


100%|██████████| 4/4 [00:00<00:00, 20.79it/s]


60


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 15600
RoPE forward torch.Size([1, 65, 12, 32]) 15600
RoPE forward torch.Size([1, 65, 12, 32]) 15600
RoPE forward torch.Size([1, 65, 12, 32]) 15600
RoPE forward torch.Size([1, 65, 12, 32]) 15600
RoPE forward torch.Size([1, 65, 12, 32]) 15600
RoPE forward torch.Size([1, 65, 12, 32]) 15600
RoPE forward torch.Size([1, 65, 12, 32]) 15600
RoPE forward torch.Size([1, 65, 12, 32]) 15600
RoPE forward torch.Size([1, 65, 12, 32]) 15600
RoPE forward torch.Size([1, 65, 12, 32]) 15600
RoPE forward torch.Size([1, 65, 12, 32]) 15600
RoPE forward torch.Size([1, 65, 12, 32]) 15600
RoPE forward torch.Size([1, 65, 12, 32]) 15600
RoPE forward torch.Size([1, 65, 12, 32]) 15600
RoPE forward torch.Size([1, 65, 12, 32]) 15600
RoPE forward torch.Size([1, 65, 12, 32]) 15665
RoPE forward torch.Size([1, 65, 12, 32]) 15665
RoPE forward torch.Size([1, 65, 12, 32]) 15665
RoPE forward torch.Size([1, 65, 12, 32]) 15665
RoPE forward torch.Size([1, 65, 12, 32]) 15665
RoPE forward 

 50%|█████     | 2/4 [00:00<00:00, 19.58it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 15665
RoPE forward torch.Size([1, 65, 12, 32]) 15665
RoPE forward torch.Size([1, 65, 12, 32]) 15665
RoPE forward torch.Size([1, 65, 12, 32]) 15665
RoPE forward torch.Size([1, 65, 12, 32]) 15665
RoPE forward torch.Size([1, 65, 12, 32]) 15665
RoPE forward torch.Size([1, 65, 12, 32]) 15730
RoPE forward torch.Size([1, 65, 12, 32]) 15730
RoPE forward torch.Size([1, 65, 12, 32]) 15730
RoPE forward torch.Size([1, 65, 12, 32]) 15730
RoPE forward torch.Size([1, 65, 12, 32]) 15730
RoPE forward torch.Size([1, 65, 12, 32]) 15730
RoPE forward torch.Size([1, 65, 12, 32]) 15730
RoPE forward torch.Size([1, 65, 12, 32]) 15730
RoPE forward torch.Size([1, 65, 12, 32]) 15730
RoPE forward torch.Size([1, 65, 12, 32]) 15730
RoPE forward torch.Size([1, 65, 12, 32]) 15730
RoPE forward torch.Size([1, 65, 12, 32]) 15730
RoPE forward torch.Size([1, 65, 12, 32]) 15730
RoPE forward torch.Size([1, 65, 12, 32]) 15730
RoPE forward torch.Size([1, 65, 12, 32]) 15730
RoPE forward 

100%|██████████| 4/4 [00:00<00:00, 21.00it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 15795
RoPE forward torch.Size([1, 65, 12, 32]) 15795
61


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 15860
RoPE forward torch.Size([1, 65, 12, 32]) 15860
RoPE forward torch.Size([1, 65, 12, 32]) 15860
RoPE forward torch.Size([1, 65, 12, 32]) 15860
RoPE forward torch.Size([1, 65, 12, 32]) 15860
RoPE forward torch.Size([1, 65, 12, 32]) 15860
RoPE forward torch.Size([1, 65, 12, 32]) 15860
RoPE forward torch.Size([1, 65, 12, 32]) 15860
RoPE forward torch.Size([1, 65, 12, 32]) 15860
RoPE forward torch.Size([1, 65, 12, 32]) 15860
RoPE forward torch.Size([1, 65, 12, 32]) 15860
RoPE forward torch.Size([1, 65, 12, 32]) 15860
RoPE forward torch.Size([1, 65, 12, 32]) 15860
RoPE forward torch.Size([1, 65, 12, 32]) 15860
RoPE forward torch.Size([1, 65, 12, 32]) 15860
RoPE forward torch.Size([1, 65, 12, 32]) 15860
RoPE forward torch.Size([1, 65, 12, 32]) 15925
RoPE forward torch.Size([1, 65, 12, 32]) 15925
RoPE forward torch.Size([1, 65, 12, 32]) 15925
RoPE forward torch.Size([1, 65, 12, 32]) 15925
RoPE forward torch.Size([1, 65, 12, 32]) 15925
RoPE forward 

 50%|█████     | 2/4 [00:00<00:00, 19.52it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 15925
RoPE forward torch.Size([1, 65, 12, 32]) 15925
RoPE forward torch.Size([1, 65, 12, 32]) 15990
RoPE forward torch.Size([1, 65, 12, 32]) 15990
RoPE forward torch.Size([1, 65, 12, 32]) 15990
RoPE forward torch.Size([1, 65, 12, 32]) 15990
RoPE forward torch.Size([1, 65, 12, 32]) 15990
RoPE forward torch.Size([1, 65, 12, 32]) 15990
RoPE forward torch.Size([1, 65, 12, 32]) 15990
RoPE forward torch.Size([1, 65, 12, 32]) 15990
RoPE forward torch.Size([1, 65, 12, 32]) 15990
RoPE forward torch.Size([1, 65, 12, 32]) 15990
RoPE forward torch.Size([1, 65, 12, 32]) 15990
RoPE forward torch.Size([1, 65, 12, 32]) 15990
RoPE forward torch.Size([1, 65, 12, 32]) 15990
RoPE forward torch.Size([1, 65, 12, 32]) 15990
RoPE forward torch.Size([1, 65, 12, 32]) 15990
RoPE forward torch.Size([1, 65, 12, 32]) 15990
RoPE forward torch.Size([1, 65, 12, 32]) 16055
RoPE forward torch.Size([1, 65, 12, 32]) 16055
RoPE forward torch.Size([1, 65, 12, 32]) 16055
RoPE forward 

100%|██████████| 4/4 [00:00<00:00, 19.92it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 16055
RoPE forward torch.Size([1, 65, 12, 32]) 16055
62


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 16120
RoPE forward torch.Size([1, 65, 12, 32]) 16120
RoPE forward torch.Size([1, 65, 12, 32]) 16120
RoPE forward torch.Size([1, 65, 12, 32]) 16120
RoPE forward torch.Size([1, 65, 12, 32]) 16120
RoPE forward torch.Size([1, 65, 12, 32]) 16120
RoPE forward torch.Size([1, 65, 12, 32]) 16120
RoPE forward torch.Size([1, 65, 12, 32]) 16120
RoPE forward torch.Size([1, 65, 12, 32]) 16120
RoPE forward torch.Size([1, 65, 12, 32]) 16120
RoPE forward torch.Size([1, 65, 12, 32]) 16120
RoPE forward torch.Size([1, 65, 12, 32]) 16120
RoPE forward torch.Size([1, 65, 12, 32]) 16120
RoPE forward torch.Size([1, 65, 12, 32]) 16120
RoPE forward torch.Size([1, 65, 12, 32]) 16120
RoPE forward torch.Size([1, 65, 12, 32]) 16120
RoPE forward torch.Size([1, 65, 12, 32]) 16185
RoPE forward torch.Size([1, 65, 12, 32]) 16185
RoPE forward torch.Size([1, 65, 12, 32]) 16185
RoPE forward torch.Size([1, 65, 12, 32]) 16185
RoPE forward torch.Size([1, 65, 12, 32]) 16185
RoPE forward 

 50%|█████     | 2/4 [00:00<00:00, 19.92it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 16250
RoPE forward torch.Size([1, 65, 12, 32]) 16250
RoPE forward torch.Size([1, 65, 12, 32]) 16250
RoPE forward torch.Size([1, 65, 12, 32]) 16250
RoPE forward torch.Size([1, 65, 12, 32]) 16250
RoPE forward torch.Size([1, 65, 12, 32]) 16250
RoPE forward torch.Size([1, 65, 12, 32]) 16250
RoPE forward torch.Size([1, 65, 12, 32]) 16250
RoPE forward torch.Size([1, 65, 12, 32]) 16250
RoPE forward torch.Size([1, 65, 12, 32]) 16250
RoPE forward torch.Size([1, 65, 12, 32]) 16250
RoPE forward torch.Size([1, 65, 12, 32]) 16250
RoPE forward torch.Size([1, 65, 12, 32]) 16250
RoPE forward torch.Size([1, 65, 12, 32]) 16250
RoPE forward torch.Size([1, 65, 12, 32]) 16250
RoPE forward torch.Size([1, 65, 12, 32]) 16250
RoPE forward torch.Size([1, 65, 12, 32]) 16315
RoPE forward torch.Size([1, 65, 12, 32]) 16315
RoPE forward torch.Size([1, 65, 12, 32]) 16315
RoPE forward torch.Size([1, 65, 12, 32]) 16315
RoPE forward torch.Size([1, 65, 12, 32]) 16315
RoPE forward 

100%|██████████| 4/4 [00:00<00:00, 20.28it/s]


63


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 16380
RoPE forward torch.Size([1, 65, 12, 32]) 16380
RoPE forward torch.Size([1, 65, 12, 32]) 16380
RoPE forward torch.Size([1, 65, 12, 32]) 16380
RoPE forward torch.Size([1, 65, 12, 32]) 16380
RoPE forward torch.Size([1, 65, 12, 32]) 16380
RoPE forward torch.Size([1, 65, 12, 32]) 16380
RoPE forward torch.Size([1, 65, 12, 32]) 16380
RoPE forward torch.Size([1, 65, 12, 32]) 16380
RoPE forward torch.Size([1, 65, 12, 32]) 16380
RoPE forward torch.Size([1, 65, 12, 32]) 16380
RoPE forward torch.Size([1, 65, 12, 32]) 16380
RoPE forward torch.Size([1, 65, 12, 32]) 16380
RoPE forward torch.Size([1, 65, 12, 32]) 16380
RoPE forward torch.Size([1, 65, 12, 32]) 16380
RoPE forward torch.Size([1, 65, 12, 32]) 16380
RoPE forward torch.Size([1, 65, 12, 32]) 16445
RoPE forward torch.Size([1, 65, 12, 32]) 16445
RoPE forward torch.Size([1, 65, 12, 32]) 16445
RoPE forward torch.Size([1, 65, 12, 32]) 16445
RoPE forward torch.Size([1, 65, 12, 32]) 16445
RoPE forward 

 50%|█████     | 2/4 [00:00<00:00, 19.41it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 16445
RoPE forward torch.Size([1, 65, 12, 32]) 16445
RoPE forward torch.Size([1, 65, 12, 32]) 16445
RoPE forward torch.Size([1, 65, 12, 32]) 16445
RoPE forward torch.Size([1, 65, 12, 32]) 16445
RoPE forward torch.Size([1, 65, 12, 32]) 16445
RoPE forward torch.Size([1, 65, 12, 32]) 16445
RoPE forward torch.Size([1, 65, 12, 32]) 16445
RoPE forward torch.Size([1, 65, 12, 32]) 16445
RoPE forward torch.Size([1, 65, 12, 32]) 16445
RoPE forward torch.Size([1, 65, 12, 32]) 16510
RoPE forward torch.Size([1, 65, 12, 32]) 16510
RoPE forward torch.Size([1, 65, 12, 32]) 16510
RoPE forward torch.Size([1, 65, 12, 32]) 16510
RoPE forward torch.Size([1, 65, 12, 32]) 16510
RoPE forward torch.Size([1, 65, 12, 32]) 16510
RoPE forward torch.Size([1, 65, 12, 32]) 16510
RoPE forward torch.Size([1, 65, 12, 32]) 16510
RoPE forward torch.Size([1, 65, 12, 32]) 16510
RoPE forward torch.Size([1, 65, 12, 32]) 16510
RoPE forward torch.Size([1, 65, 12, 32]) 16510
RoPE forward 

100%|██████████| 4/4 [00:00<00:00, 20.76it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 16575
RoPE forward torch.Size([1, 65, 12, 32]) 16575
64


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 16640
RoPE forward torch.Size([1, 65, 12, 32]) 16640
RoPE forward torch.Size([1, 65, 12, 32]) 16640
RoPE forward torch.Size([1, 65, 12, 32]) 16640
RoPE forward torch.Size([1, 65, 12, 32]) 16640
RoPE forward torch.Size([1, 65, 12, 32]) 16640
RoPE forward torch.Size([1, 65, 12, 32]) 16640
RoPE forward torch.Size([1, 65, 12, 32]) 16640
RoPE forward torch.Size([1, 65, 12, 32]) 16640
RoPE forward torch.Size([1, 65, 12, 32]) 16640
RoPE forward torch.Size([1, 65, 12, 32]) 16640
RoPE forward torch.Size([1, 65, 12, 32]) 16640
RoPE forward torch.Size([1, 65, 12, 32]) 16640
RoPE forward torch.Size([1, 65, 12, 32]) 16640
RoPE forward torch.Size([1, 65, 12, 32]) 16640
RoPE forward torch.Size([1, 65, 12, 32]) 16640
RoPE forward torch.Size([1, 65, 12, 32]) 16705
RoPE forward torch.Size([1, 65, 12, 32]) 16705
RoPE forward torch.Size([1, 65, 12, 32]) 16705
RoPE forward torch.Size([1, 65, 12, 32]) 16705
RoPE forward torch.Size([1, 65, 12, 32]) 16705
RoPE forward 

 50%|█████     | 2/4 [00:00<00:00, 18.40it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 16705
RoPE forward torch.Size([1, 65, 12, 32]) 16705
RoPE forward torch.Size([1, 65, 12, 32]) 16705
RoPE forward torch.Size([1, 65, 12, 32]) 16705
RoPE forward torch.Size([1, 65, 12, 32]) 16705
RoPE forward torch.Size([1, 65, 12, 32]) 16705
RoPE forward torch.Size([1, 65, 12, 32]) 16705
RoPE forward torch.Size([1, 65, 12, 32]) 16705
RoPE forward torch.Size([1, 65, 12, 32]) 16770
RoPE forward torch.Size([1, 65, 12, 32]) 16770
RoPE forward torch.Size([1, 65, 12, 32]) 16770
RoPE forward torch.Size([1, 65, 12, 32]) 16770
RoPE forward torch.Size([1, 65, 12, 32]) 16770
RoPE forward torch.Size([1, 65, 12, 32]) 16770
RoPE forward torch.Size([1, 65, 12, 32]) 16770
RoPE forward torch.Size([1, 65, 12, 32]) 16770
RoPE forward torch.Size([1, 65, 12, 32]) 16770
RoPE forward torch.Size([1, 65, 12, 32]) 16770
RoPE forward torch.Size([1, 65, 12, 32]) 16770
RoPE forward torch.Size([1, 65, 12, 32]) 16770
RoPE forward torch.Size([1, 65, 12, 32]) 16770
RoPE forward 

100%|██████████| 4/4 [00:00<00:00, 20.24it/s]


65


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 16900
RoPE forward torch.Size([1, 65, 12, 32]) 16900
RoPE forward torch.Size([1, 65, 12, 32]) 16900
RoPE forward torch.Size([1, 65, 12, 32]) 16900
RoPE forward torch.Size([1, 65, 12, 32]) 16900
RoPE forward torch.Size([1, 65, 12, 32]) 16900
RoPE forward torch.Size([1, 65, 12, 32]) 16900
RoPE forward torch.Size([1, 65, 12, 32]) 16900
RoPE forward torch.Size([1, 65, 12, 32]) 16900
RoPE forward torch.Size([1, 65, 12, 32]) 16900
RoPE forward torch.Size([1, 65, 12, 32]) 16900
RoPE forward torch.Size([1, 65, 12, 32]) 16900
RoPE forward torch.Size([1, 65, 12, 32]) 16900
RoPE forward torch.Size([1, 65, 12, 32]) 16900
RoPE forward torch.Size([1, 65, 12, 32]) 16900
RoPE forward torch.Size([1, 65, 12, 32]) 16900
RoPE forward torch.Size([1, 65, 12, 32]) 16965
RoPE forward torch.Size([1, 65, 12, 32]) 16965
RoPE forward torch.Size([1, 65, 12, 32]) 16965
RoPE forward torch.Size([1, 65, 12, 32]) 16965
RoPE forward torch.Size([1, 65, 12, 32]) 16965
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 20.18it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 17095
RoPE forward torch.Size([1, 65, 12, 32]) 17095
RoPE forward torch.Size([1, 65, 12, 32]) 17095
RoPE forward torch.Size([1, 65, 12, 32]) 17095
RoPE forward torch.Size([1, 65, 12, 32]) 17095
RoPE forward torch.Size([1, 65, 12, 32]) 17095
RoPE forward torch.Size([1, 65, 12, 32]) 17095
RoPE forward torch.Size([1, 65, 12, 32]) 17095
RoPE forward torch.Size([1, 65, 12, 32]) 17095
RoPE forward torch.Size([1, 65, 12, 32]) 17095
RoPE forward torch.Size([1, 65, 12, 32]) 17095
RoPE forward torch.Size([1, 65, 12, 32]) 17095


100%|██████████| 4/4 [00:00<00:00, 20.11it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 17095
RoPE forward torch.Size([1, 65, 12, 32]) 17095
RoPE forward torch.Size([1, 65, 12, 32]) 17095
RoPE forward torch.Size([1, 65, 12, 32]) 17095
66


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 17160
RoPE forward torch.Size([1, 65, 12, 32]) 17160
RoPE forward torch.Size([1, 65, 12, 32]) 17160
RoPE forward torch.Size([1, 65, 12, 32]) 17160
RoPE forward torch.Size([1, 65, 12, 32]) 17160
RoPE forward torch.Size([1, 65, 12, 32]) 17160
RoPE forward torch.Size([1, 65, 12, 32]) 17160
RoPE forward torch.Size([1, 65, 12, 32]) 17160
RoPE forward torch.Size([1, 65, 12, 32]) 17160
RoPE forward torch.Size([1, 65, 12, 32]) 17160
RoPE forward torch.Size([1, 65, 12, 32]) 17160
RoPE forward torch.Size([1, 65, 12, 32]) 17160
RoPE forward torch.Size([1, 65, 12, 32]) 17160
RoPE forward torch.Size([1, 65, 12, 32]) 17160
RoPE forward torch.Size([1, 65, 12, 32]) 17160
RoPE forward torch.Size([1, 65, 12, 32]) 17160
RoPE forward torch.Size([1, 65, 12, 32]) 17225
RoPE forward torch.Size([1, 65, 12, 32]) 17225
RoPE forward torch.Size([1, 65, 12, 32]) 17225
RoPE forward torch.Size([1, 65, 12, 32]) 17225
RoPE forward torch.Size([1, 65, 12, 32]) 17225
RoPE forward 

 50%|█████     | 2/4 [00:00<00:00, 19.56it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 17225
RoPE forward torch.Size([1, 65, 12, 32]) 17225
RoPE forward torch.Size([1, 65, 12, 32]) 17290
RoPE forward torch.Size([1, 65, 12, 32]) 17290
RoPE forward torch.Size([1, 65, 12, 32]) 17290
RoPE forward torch.Size([1, 65, 12, 32]) 17290
RoPE forward torch.Size([1, 65, 12, 32]) 17290
RoPE forward torch.Size([1, 65, 12, 32]) 17290
RoPE forward torch.Size([1, 65, 12, 32]) 17290
RoPE forward torch.Size([1, 65, 12, 32]) 17290
RoPE forward torch.Size([1, 65, 12, 32]) 17290
RoPE forward torch.Size([1, 65, 12, 32]) 17290
RoPE forward torch.Size([1, 65, 12, 32]) 17290
RoPE forward torch.Size([1, 65, 12, 32]) 17290
RoPE forward torch.Size([1, 65, 12, 32]) 17290
RoPE forward torch.Size([1, 65, 12, 32]) 17290
RoPE forward torch.Size([1, 65, 12, 32]) 17290
RoPE forward torch.Size([1, 65, 12, 32]) 17290
RoPE forward torch.Size([1, 65, 12, 32]) 17355
RoPE forward torch.Size([1, 65, 12, 32]) 17355
RoPE forward torch.Size([1, 65, 12, 32]) 17355
RoPE forward 

100%|██████████| 4/4 [00:00<00:00, 18.74it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 17355
RoPE forward torch.Size([1, 65, 12, 32]) 17355
67


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 17420
RoPE forward torch.Size([1, 65, 12, 32]) 17420
RoPE forward torch.Size([1, 65, 12, 32]) 17420
RoPE forward torch.Size([1, 65, 12, 32]) 17420
RoPE forward torch.Size([1, 65, 12, 32]) 17420
RoPE forward torch.Size([1, 65, 12, 32]) 17420
RoPE forward torch.Size([1, 65, 12, 32]) 17420
RoPE forward torch.Size([1, 65, 12, 32]) 17420
RoPE forward torch.Size([1, 65, 12, 32]) 17420
RoPE forward torch.Size([1, 65, 12, 32]) 17420
RoPE forward torch.Size([1, 65, 12, 32]) 17420
RoPE forward torch.Size([1, 65, 12, 32]) 17420
RoPE forward torch.Size([1, 65, 12, 32]) 17420
RoPE forward torch.Size([1, 65, 12, 32]) 17420
RoPE forward torch.Size([1, 65, 12, 32]) 17420
RoPE forward torch.Size([1, 65, 12, 32]) 17420
RoPE forward torch.Size([1, 65, 12, 32]) 17485
RoPE forward torch.Size([1, 65, 12, 32]) 17485
RoPE forward torch.Size([1, 65, 12, 32]) 17485
RoPE forward torch.Size([1, 65, 12, 32]) 17485
RoPE forward torch.Size([1, 65, 12, 32]) 17485
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 21.46it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 17550
RoPE forward torch.Size([1, 65, 12, 32]) 17550
RoPE forward torch.Size([1, 65, 12, 32]) 17615
RoPE forward torch.Size([1, 65, 12, 32]) 17615
RoPE forward torch.Size([1, 65, 12, 32]) 17615
RoPE forward torch.Size([1, 65, 12, 32]) 17615


100%|██████████| 4/4 [00:00<00:00, 21.94it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 17615
RoPE forward torch.Size([1, 65, 12, 32]) 17615
RoPE forward torch.Size([1, 65, 12, 32]) 17615
RoPE forward torch.Size([1, 65, 12, 32]) 17615
RoPE forward torch.Size([1, 65, 12, 32]) 17615
RoPE forward torch.Size([1, 65, 12, 32]) 17615
RoPE forward torch.Size([1, 65, 12, 32]) 17615
RoPE forward torch.Size([1, 65, 12, 32]) 17615
RoPE forward torch.Size([1, 65, 12, 32]) 17615
RoPE forward torch.Size([1, 65, 12, 32]) 17615
RoPE forward torch.Size([1, 65, 12, 32]) 17615
RoPE forward torch.Size([1, 65, 12, 32]) 17615





68


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 17680
RoPE forward torch.Size([1, 65, 12, 32]) 17680
RoPE forward torch.Size([1, 65, 12, 32]) 17680
RoPE forward torch.Size([1, 65, 12, 32]) 17680
RoPE forward torch.Size([1, 65, 12, 32]) 17680
RoPE forward torch.Size([1, 65, 12, 32]) 17680
RoPE forward torch.Size([1, 65, 12, 32]) 17680
RoPE forward torch.Size([1, 65, 12, 32]) 17680
RoPE forward torch.Size([1, 65, 12, 32]) 17680
RoPE forward torch.Size([1, 65, 12, 32]) 17680
RoPE forward torch.Size([1, 65, 12, 32]) 17680
RoPE forward torch.Size([1, 65, 12, 32]) 17680
RoPE forward torch.Size([1, 65, 12, 32]) 17680
RoPE forward torch.Size([1, 65, 12, 32]) 17680
RoPE forward torch.Size([1, 65, 12, 32]) 17680
RoPE forward torch.Size([1, 65, 12, 32]) 17680
RoPE forward torch.Size([1, 65, 12, 32]) 17745
RoPE forward torch.Size([1, 65, 12, 32]) 17745
RoPE forward torch.Size([1, 65, 12, 32]) 17745
RoPE forward torch.Size([1, 65, 12, 32]) 17745
RoPE forward torch.Size([1, 65, 12, 32]) 17745
RoPE forward 

 50%|█████     | 2/4 [00:00<00:00, 19.45it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 17810
RoPE forward torch.Size([1, 65, 12, 32]) 17810
RoPE forward torch.Size([1, 65, 12, 32]) 17810
RoPE forward torch.Size([1, 65, 12, 32]) 17810
RoPE forward torch.Size([1, 65, 12, 32]) 17810
RoPE forward torch.Size([1, 65, 12, 32]) 17810
RoPE forward torch.Size([1, 65, 12, 32]) 17810
RoPE forward torch.Size([1, 65, 12, 32]) 17810
RoPE forward torch.Size([1, 65, 12, 32]) 17810
RoPE forward torch.Size([1, 65, 12, 32]) 17810
RoPE forward torch.Size([1, 65, 12, 32]) 17810
RoPE forward torch.Size([1, 65, 12, 32]) 17810
RoPE forward torch.Size([1, 65, 12, 32]) 17810
RoPE forward torch.Size([1, 65, 12, 32]) 17810
RoPE forward torch.Size([1, 65, 12, 32]) 17810
RoPE forward torch.Size([1, 65, 12, 32]) 17810
RoPE forward torch.Size([1, 65, 12, 32]) 17875
RoPE forward torch.Size([1, 65, 12, 32]) 17875
RoPE forward torch.Size([1, 65, 12, 32]) 17875
RoPE forward torch.Size([1, 65, 12, 32]) 17875
RoPE forward torch.Size([1, 65, 12, 32]) 17875
RoPE forward 

100%|██████████| 4/4 [00:00<00:00, 21.02it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 17875
RoPE forward torch.Size([1, 65, 12, 32]) 17875
RoPE forward torch.Size([1, 65, 12, 32]) 17875
RoPE forward torch.Size([1, 65, 12, 32]) 17875
RoPE forward torch.Size([1, 65, 12, 32]) 17875
RoPE forward torch.Size([1, 65, 12, 32]) 17875
RoPE forward torch.Size([1, 65, 12, 32]) 17875
69


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 17940
RoPE forward torch.Size([1, 65, 12, 32]) 17940
RoPE forward torch.Size([1, 65, 12, 32]) 17940
RoPE forward torch.Size([1, 65, 12, 32]) 17940
RoPE forward torch.Size([1, 65, 12, 32]) 17940
RoPE forward torch.Size([1, 65, 12, 32]) 17940
RoPE forward torch.Size([1, 65, 12, 32]) 17940
RoPE forward torch.Size([1, 65, 12, 32]) 17940
RoPE forward torch.Size([1, 65, 12, 32]) 17940
RoPE forward torch.Size([1, 65, 12, 32]) 17940
RoPE forward torch.Size([1, 65, 12, 32]) 17940
RoPE forward torch.Size([1, 65, 12, 32]) 17940
RoPE forward torch.Size([1, 65, 12, 32]) 17940
RoPE forward torch.Size([1, 65, 12, 32]) 17940
RoPE forward torch.Size([1, 65, 12, 32]) 17940
RoPE forward torch.Size([1, 65, 12, 32]) 17940
RoPE forward torch.Size([1, 65, 12, 32]) 18005
RoPE forward torch.Size([1, 65, 12, 32]) 18005
RoPE forward torch.Size([1, 65, 12, 32]) 18005
RoPE forward torch.Size([1, 65, 12, 32]) 18005
RoPE forward torch.Size([1, 65, 12, 32]) 18005
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 21.50it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 18070
RoPE forward torch.Size([1, 65, 12, 32]) 18070
RoPE forward torch.Size([1, 65, 12, 32]) 18070
RoPE forward torch.Size([1, 65, 12, 32]) 18070
RoPE forward torch.Size([1, 65, 12, 32]) 18070
RoPE forward torch.Size([1, 65, 12, 32]) 18070
RoPE forward torch.Size([1, 65, 12, 32]) 18070
RoPE forward torch.Size([1, 65, 12, 32]) 18070
RoPE forward torch.Size([1, 65, 12, 32]) 18135
RoPE forward torch.Size([1, 65, 12, 32]) 18135
RoPE forward torch.Size([1, 65, 12, 32]) 18135
RoPE forward torch.Size([1, 65, 12, 32]) 18135
RoPE forward torch.Size([1, 65, 12, 32]) 18135
RoPE forward torch.Size([1, 65, 12, 32]) 18135
RoPE forward torch.Size([1, 65, 12, 32]) 18135
RoPE forward torch.Size([1, 65, 12, 32]) 18135
RoPE forward torch.Size([1, 65, 12, 32]) 18135
RoPE forward torch.Size([1, 65, 12, 32]) 18135
RoPE forward torch.Size([1, 65, 12, 32]) 18135
RoPE forward torch.Size([1, 65, 12, 32]) 18135
RoPE forward torch.Size([1, 65, 12, 32]) 18135
RoPE forward 

100%|██████████| 4/4 [00:00<00:00, 21.20it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 18135
70


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 18200
RoPE forward torch.Size([1, 65, 12, 32]) 18200
RoPE forward torch.Size([1, 65, 12, 32]) 18200
RoPE forward torch.Size([1, 65, 12, 32]) 18200
RoPE forward torch.Size([1, 65, 12, 32]) 18200
RoPE forward torch.Size([1, 65, 12, 32]) 18200
RoPE forward torch.Size([1, 65, 12, 32]) 18200
RoPE forward torch.Size([1, 65, 12, 32]) 18200
RoPE forward torch.Size([1, 65, 12, 32]) 18200
RoPE forward torch.Size([1, 65, 12, 32]) 18200
RoPE forward torch.Size([1, 65, 12, 32]) 18200
RoPE forward torch.Size([1, 65, 12, 32]) 18200
RoPE forward torch.Size([1, 65, 12, 32]) 18200
RoPE forward torch.Size([1, 65, 12, 32]) 18200
RoPE forward torch.Size([1, 65, 12, 32]) 18200
RoPE forward torch.Size([1, 65, 12, 32]) 18200
RoPE forward torch.Size([1, 65, 12, 32]) 18265
RoPE forward torch.Size([1, 65, 12, 32]) 18265
RoPE forward torch.Size([1, 65, 12, 32]) 18265
RoPE forward torch.Size([1, 65, 12, 32]) 18265
RoPE forward torch.Size([1, 65, 12, 32]) 18265
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 21.53it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 18395
RoPE forward torch.Size([1, 65, 12, 32]) 18395
RoPE forward torch.Size([1, 65, 12, 32]) 18395
RoPE forward torch.Size([1, 65, 12, 32]) 18395
RoPE forward torch.Size([1, 65, 12, 32]) 18395
RoPE forward torch.Size([1, 65, 12, 32]) 18395
RoPE forward torch.Size([1, 65, 12, 32]) 18395
RoPE forward torch.Size([1, 65, 12, 32]) 18395
RoPE forward torch.Size([1, 65, 12, 32]) 18395
RoPE forward torch.Size([1, 65, 12, 32]) 18395
RoPE forward torch.Size([1, 65, 12, 32]) 18395
RoPE forward torch.Size([1, 65, 12, 32]) 18395
RoPE forward torch.Size([1, 65, 12, 32]) 18395


100%|██████████| 4/4 [00:00<00:00, 20.74it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 18395
RoPE forward torch.Size([1, 65, 12, 32]) 18395
RoPE forward torch.Size([1, 65, 12, 32]) 18395
71


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 18460
RoPE forward torch.Size([1, 65, 12, 32]) 18460
RoPE forward torch.Size([1, 65, 12, 32]) 18460
RoPE forward torch.Size([1, 65, 12, 32]) 18460
RoPE forward torch.Size([1, 65, 12, 32]) 18460
RoPE forward torch.Size([1, 65, 12, 32]) 18460
RoPE forward torch.Size([1, 65, 12, 32]) 18460
RoPE forward torch.Size([1, 65, 12, 32]) 18460
RoPE forward torch.Size([1, 65, 12, 32]) 18460
RoPE forward torch.Size([1, 65, 12, 32]) 18460
RoPE forward torch.Size([1, 65, 12, 32]) 18460
RoPE forward torch.Size([1, 65, 12, 32]) 18460
RoPE forward torch.Size([1, 65, 12, 32]) 18460
RoPE forward torch.Size([1, 65, 12, 32]) 18460
RoPE forward torch.Size([1, 65, 12, 32]) 18460
RoPE forward torch.Size([1, 65, 12, 32]) 18460
RoPE forward torch.Size([1, 65, 12, 32]) 18525
RoPE forward torch.Size([1, 65, 12, 32]) 18525
RoPE forward torch.Size([1, 65, 12, 32]) 18525
RoPE forward torch.Size([1, 65, 12, 32]) 18525
RoPE forward torch.Size([1, 65, 12, 32]) 18525
RoPE forward 

 75%|███████▌  | 3/4 [00:00<00:00, 21.91it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 18590
RoPE forward torch.Size([1, 65, 12, 32]) 18590
RoPE forward torch.Size([1, 65, 12, 32]) 18590
RoPE forward torch.Size([1, 65, 12, 32]) 18590
RoPE forward torch.Size([1, 65, 12, 32]) 18590
RoPE forward torch.Size([1, 65, 12, 32]) 18590
RoPE forward torch.Size([1, 65, 12, 32]) 18655
RoPE forward torch.Size([1, 65, 12, 32]) 18655
RoPE forward torch.Size([1, 65, 12, 32]) 18655
RoPE forward torch.Size([1, 65, 12, 32]) 18655
RoPE forward torch.Size([1, 65, 12, 32]) 18655
RoPE forward torch.Size([1, 65, 12, 32]) 18655
RoPE forward torch.Size([1, 65, 12, 32]) 18655
RoPE forward torch.Size([1, 65, 12, 32]) 18655
RoPE forward torch.Size([1, 65, 12, 32]) 18655
RoPE forward torch.Size([1, 65, 12, 32]) 18655
RoPE forward torch.Size([1, 65, 12, 32]) 18655
RoPE forward torch.Size([1, 65, 12, 32]) 18655
RoPE forward torch.Size([1, 65, 12, 32]) 18655
RoPE forward torch.Size([1, 65, 12, 32]) 18655


100%|██████████| 4/4 [00:00<00:00, 22.24it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 18655
RoPE forward torch.Size([1, 65, 12, 32]) 18655
72


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 18720
RoPE forward torch.Size([1, 65, 12, 32]) 18720
RoPE forward torch.Size([1, 65, 12, 32]) 18720
RoPE forward torch.Size([1, 65, 12, 32]) 18720
RoPE forward torch.Size([1, 65, 12, 32]) 18720
RoPE forward torch.Size([1, 65, 12, 32]) 18720
RoPE forward torch.Size([1, 65, 12, 32]) 18720
RoPE forward torch.Size([1, 65, 12, 32]) 18720
RoPE forward torch.Size([1, 65, 12, 32]) 18720
RoPE forward torch.Size([1, 65, 12, 32]) 18720
RoPE forward torch.Size([1, 65, 12, 32]) 18720
RoPE forward torch.Size([1, 65, 12, 32]) 18720
RoPE forward torch.Size([1, 65, 12, 32]) 18720
RoPE forward torch.Size([1, 65, 12, 32]) 18720
RoPE forward torch.Size([1, 65, 12, 32]) 18720
RoPE forward torch.Size([1, 65, 12, 32]) 18720
RoPE forward torch.Size([1, 65, 12, 32]) 18785
RoPE forward torch.Size([1, 65, 12, 32]) 18785
RoPE forward torch.Size([1, 65, 12, 32]) 18785
RoPE forward torch.Size([1, 65, 12, 32]) 18785
RoPE forward torch.Size([1, 65, 12, 32]) 18785
RoPE forward 

 50%|█████     | 2/4 [00:00<00:00, 18.16it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 18785
RoPE forward torch.Size([1, 65, 12, 32]) 18785
RoPE forward torch.Size([1, 65, 12, 32]) 18785
RoPE forward torch.Size([1, 65, 12, 32]) 18785
RoPE forward torch.Size([1, 65, 12, 32]) 18850
RoPE forward torch.Size([1, 65, 12, 32]) 18850
RoPE forward torch.Size([1, 65, 12, 32]) 18850
RoPE forward torch.Size([1, 65, 12, 32]) 18850
RoPE forward torch.Size([1, 65, 12, 32]) 18850
RoPE forward torch.Size([1, 65, 12, 32]) 18850
RoPE forward torch.Size([1, 65, 12, 32]) 18850
RoPE forward torch.Size([1, 65, 12, 32]) 18850
RoPE forward torch.Size([1, 65, 12, 32]) 18850
RoPE forward torch.Size([1, 65, 12, 32]) 18850
RoPE forward torch.Size([1, 65, 12, 32]) 18850
RoPE forward torch.Size([1, 65, 12, 32]) 18850
RoPE forward torch.Size([1, 65, 12, 32]) 18850
RoPE forward torch.Size([1, 65, 12, 32]) 18850
RoPE forward torch.Size([1, 65, 12, 32]) 18850
RoPE forward torch.Size([1, 65, 12, 32]) 18850
RoPE forward torch.Size([1, 65, 12, 32]) 18915
RoPE forward 

100%|██████████| 4/4 [00:00<00:00, 18.56it/s]


73


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 18980
RoPE forward torch.Size([1, 65, 12, 32]) 18980
RoPE forward torch.Size([1, 65, 12, 32]) 18980
RoPE forward torch.Size([1, 65, 12, 32]) 18980
RoPE forward torch.Size([1, 65, 12, 32]) 18980
RoPE forward torch.Size([1, 65, 12, 32]) 18980
RoPE forward torch.Size([1, 65, 12, 32]) 18980
RoPE forward torch.Size([1, 65, 12, 32]) 18980
RoPE forward torch.Size([1, 65, 12, 32]) 18980
RoPE forward torch.Size([1, 65, 12, 32]) 18980
RoPE forward torch.Size([1, 65, 12, 32]) 18980
RoPE forward torch.Size([1, 65, 12, 32]) 18980
RoPE forward torch.Size([1, 65, 12, 32]) 18980
RoPE forward torch.Size([1, 65, 12, 32]) 18980
RoPE forward torch.Size([1, 65, 12, 32]) 18980
RoPE forward torch.Size([1, 65, 12, 32]) 18980
RoPE forward torch.Size([1, 65, 12, 32]) 19045
RoPE forward torch.Size([1, 65, 12, 32]) 19045
RoPE forward torch.Size([1, 65, 12, 32]) 19045
RoPE forward torch.Size([1, 65, 12, 32]) 19045
RoPE forward torch.Size([1, 65, 12, 32]) 19045
RoPE forward 

 50%|█████     | 2/4 [00:00<00:00, 19.71it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 19045
RoPE forward torch.Size([1, 65, 12, 32]) 19045
RoPE forward torch.Size([1, 65, 12, 32]) 19045
RoPE forward torch.Size([1, 65, 12, 32]) 19045
RoPE forward torch.Size([1, 65, 12, 32]) 19045
RoPE forward torch.Size([1, 65, 12, 32]) 19045
RoPE forward torch.Size([1, 65, 12, 32]) 19110
RoPE forward torch.Size([1, 65, 12, 32]) 19110
RoPE forward torch.Size([1, 65, 12, 32]) 19110
RoPE forward torch.Size([1, 65, 12, 32]) 19110
RoPE forward torch.Size([1, 65, 12, 32]) 19110
RoPE forward torch.Size([1, 65, 12, 32]) 19110
RoPE forward torch.Size([1, 65, 12, 32]) 19110
RoPE forward torch.Size([1, 65, 12, 32]) 19110
RoPE forward torch.Size([1, 65, 12, 32]) 19110
RoPE forward torch.Size([1, 65, 12, 32]) 19110
RoPE forward torch.Size([1, 65, 12, 32]) 19110
RoPE forward torch.Size([1, 65, 12, 32]) 19110
RoPE forward torch.Size([1, 65, 12, 32]) 19110
RoPE forward torch.Size([1, 65, 12, 32]) 19110
RoPE forward torch.Size([1, 65, 12, 32]) 19110
RoPE forward 

100%|██████████| 4/4 [00:00<00:00, 20.16it/s]


74


  0%|          | 0/4 [00:00<?, ?it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 19240
RoPE forward torch.Size([1, 65, 12, 32]) 19240
RoPE forward torch.Size([1, 65, 12, 32]) 19240
RoPE forward torch.Size([1, 65, 12, 32]) 19240
RoPE forward torch.Size([1, 65, 12, 32]) 19240
RoPE forward torch.Size([1, 65, 12, 32]) 19240
RoPE forward torch.Size([1, 65, 12, 32]) 19240
RoPE forward torch.Size([1, 65, 12, 32]) 19240
RoPE forward torch.Size([1, 65, 12, 32]) 19240
RoPE forward torch.Size([1, 65, 12, 32]) 19240
RoPE forward torch.Size([1, 65, 12, 32]) 19240
RoPE forward torch.Size([1, 65, 12, 32]) 19240
RoPE forward torch.Size([1, 65, 12, 32]) 19240
RoPE forward torch.Size([1, 65, 12, 32]) 19240
RoPE forward torch.Size([1, 65, 12, 32]) 19240
RoPE forward torch.Size([1, 65, 12, 32]) 19240
RoPE forward torch.Size([1, 65, 12, 32]) 19305
RoPE forward torch.Size([1, 65, 12, 32]) 19305
RoPE forward torch.Size([1, 65, 12, 32]) 19305
RoPE forward torch.Size([1, 65, 12, 32]) 19305
RoPE forward torch.Size([1, 65, 12, 32]) 19305
RoPE forward 

 50%|█████     | 2/4 [00:00<00:00, 19.70it/s]

RoPE forward torch.Size([1, 65, 12, 32]) 19305
RoPE forward torch.Size([1, 65, 12, 32]) 19305
RoPE forward torch.Size([1, 65, 12, 32]) 19305
RoPE forward torch.Size([1, 65, 12, 32]) 19305
RoPE forward torch.Size([1, 65, 12, 32]) 19370
RoPE forward torch.Size([1, 65, 12, 32]) 19370
RoPE forward torch.Size([1, 65, 12, 32]) 19370
RoPE forward torch.Size([1, 65, 12, 32]) 19370
RoPE forward torch.Size([1, 65, 12, 32]) 19370
RoPE forward torch.Size([1, 65, 12, 32]) 19370
RoPE forward torch.Size([1, 65, 12, 32]) 19370
RoPE forward torch.Size([1, 65, 12, 32]) 19370
RoPE forward torch.Size([1, 65, 12, 32]) 19370
RoPE forward torch.Size([1, 65, 12, 32]) 19370
RoPE forward torch.Size([1, 65, 12, 32]) 19370
RoPE forward torch.Size([1, 65, 12, 32]) 19370
RoPE forward torch.Size([1, 65, 12, 32]) 19370
RoPE forward torch.Size([1, 65, 12, 32]) 19370
RoPE forward torch.Size([1, 65, 12, 32]) 19370
RoPE forward torch.Size([1, 65, 12, 32]) 19370
RoPE forward torch.Size([1, 65, 12, 32]) 19435
RoPE forward 

100%|██████████| 4/4 [00:00<00:00, 20.72it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 19435
RoPE forward torch.Size([1, 65, 12, 32]) 19435
75


  0%|          | 0/4 [00:00<?, ?it/s]


RoPE forward torch.Size([1, 65, 12, 32]) 19500


RuntimeError: The size of tensor a (0) must match the size of tensor b (65) at non-singleton dimension 1

In [None]:
actions = t.tensor([6*[2]], dtype=t.int32, device="mps")
frames = render_video(actions, z[:, 0])
frames = pred2frame(frames)

import matplotlib.pyplot as plt
import matplotlib.animation as animation

# z[0] shape: (num_frames, height, width, 3), where num_frames = frames.shape[1]
frames_np = frames[0].permute(0, 2, 3, 1).cpu().numpy()  # shape: (num_frames, height, width, 3)

fig, ax = plt.subplots()
im = ax.imshow(frames_np[0])
ax.set_title('Frame 0')

def animate(i):
    im.set_data(frames_np[i])
    ax.set_title(f'Frame {i}')
    return [im]

ani = animation.FuncAnimation(
    fig, animate, frames=frames_np.shape[0],
    interval=200, blit=True, repeat=True
)

plt.show()

# Optional: To save the gif uncomment below (requires pillow, install with `pip install pillow`)
ani.save("sample_cont.gif", writer="pillow")