# Explain what does PyTorch 2.0 do to your code

PyTorch 2.0 looks like magic to many researchers, as they dynamically translate the bytecode for you. Many people don't know Python bytecode, so they don't know how do the translated code look like. Fortunately, with `depyf`, we can clearly illustrate it for you.

# Run some code with `torch.compile`, and use `eager` backend

Note that we use `eager` backend so that the compiled subgraph runs in eager mode, and then we can easily get its code.

In [2]:
import torch
@torch.compile(backend="eager")
def toy_example(a, b):
    x = a / (torch.abs(a) + 1)
    if b.sum() < 0:
        b = b * -1
    return x * b

for _ in range(100):
    toy_example(torch.randn(10), torch.randn(10))

# Interactively explore everything you are curious about

In [9]:
from depyf.explain import interactive_explain
interactive_explain(toy_example)

# transformed source code:

<IPython.core.display.JSON object>

# source code of referenced function:

<details>
  <summary>compiled_code_6</summary>

  ```python
  def compiled_code_6(a, b):
      __temp_1689 = __compiled_fn_0(a, b)
      x = __temp_1689[0]
      if __temp_1689[1]:
          return __resume_at_30_1(b, x)
      return __resume_at_38_2(b, x)

  ```
</details>
<details>
  <summary>toy_example</summary>

  ```python
  def toy_example(a, b):
      x = a / (torch.abs(a) + 1)
      if b.sum() < 0:
          b = b * -1
      return x * b

  ```
</details>
<details>
  <summary>__resume_at_38_2</summary>

  ```python
  def __resume_at_38_2(b, x):
      return x * b

  ```
</details>
<details>
  <summary>__compiled_fn_4</summary>

  ```python
  def __compiled_fn_4(self, L_x_, L_b_):
      l_x_ = L_x_
      l_b_ = L_b_
      mul = l_x_ * l_b_
      l_x_ = None
      l_b_ = None
      return mul,

  ```
</details>
<details>
  <summary>compiled_code_7</summary>

  ```python
  def compiled_code_7(b, x):
      return __compiled_fn_3(b, x)[0]

  ```
</details>
<details>
  <summary>__compiled_fn_3</summary>

  ```python
  def __compiled_fn_3(self, L_b_, L_x_):
      l_b_ = L_b_
      l_x_ = L_x_
      b = l_b_ * -1
      l_b_ = None
      mul_1 = l_x_ * b
      l_x_ = None
      b = None
      return mul_1,

  ```
</details>
<details>
  <summary>__resume_at_30_1</summary>

  ```python
  def __resume_at_30_1(b, x):
      b = b * -1
      return x * b

  ```
</details>
<details>
  <summary>__compiled_fn_0</summary>

  ```python
  def __compiled_fn_0(self, L_a_, L_b_):
      l_a_ = L_a_
      l_b_ = L_b_
      abs_1 = torch.abs(l_a_)
      add = abs_1 + 1
      abs_1 = None
      x = l_a_ / add
      l_a_ = None
      add = None
      sum_1 = l_b_.sum()
      l_b_ = None
      lt = sum_1 < 0
      sum_1 = None
      return x, lt

  ```
</details>
<details>
  <summary>compiled_code_8</summary>

  ```python
  def compiled_code_8(b, x):
      return __compiled_fn_4(x, b)[0]

  ```
</details>


In [10]:
import torchvision
model = torchvision.models.swin_t(pretrained=True)
model_compile = torch.compile(model, backend='eager')
for _ in range(100):
    model_compile(torch.randn(1,3,224,224))



In [11]:
interactive_explain(model_compile)

# transformed source code:

<IPython.core.display.JSON object>

# source code of referenced function:

<details>
  <summary>__compiled_fn_6</summary>

  ```python
  def __compiled_fn_6(self, L_x_):
      l_x_ = L_x_
      l__self___features_0_0 = self.L__self___features_0_0(l_x_)
      l_x_ = None
      __temp_2498 = []
      __temp_2498.extend((0, 2, 3, 1))
      permute = torch.permute(l__self___features_0_0, __temp_2498)
      l__self___features_0_0 = None
      l__self___features_0_2 = self.L__self___features_0_2(permute)
      permute = None
      getattr_getattr_l__self___features___1_____0___norm1 = (self.
          getattr_getattr_L__self___features___1_____0___norm1(
          l__self___features_0_2))
      (
          getattr_getattr_l__self___features___1_____0___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___1_____0___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___1_____0___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___1_____0___attn_relative_position_index
          )
      relative_position_bias = (
          getattr_getattr_l__self___features___1_____0___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___1_____0___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___1_____0___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___1_____0___attn_relative_position_index
          ) = None
      relative_position_bias_1 = relative_position_bias.view(49, 49, -1)
      relative_position_bias = None
      permute_1 = relative_position_bias_1.permute(2, 0, 1)
      relative_position_bias_1 = None
      contiguous = permute_1.contiguous()
      permute_1 = None
      relative_position_bias_3 = contiguous.unsqueeze(0)
      contiguous = None
      getattr_getattr_l__self___features___1_____0___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___1_____0___attn_qkv_weight)
      getattr_getattr_l__self___features___1_____0___attn_proj_weight = (self.
          getattr_getattr_L__self___features___1_____0___attn_proj_weight)
      getattr_getattr_l__self___features___1_____0___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___1_____0___attn_qkv_bias)
      getattr_getattr_l__self___features___1_____0___attn_proj_bias = (self.
          getattr_getattr_L__self___features___1_____0___attn_proj_bias)
      x = torch.nn.functional.pad(
          getattr_getattr_l__self___features___1_____0___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___1_____0___norm1 = None
      x_1 = x.view(1, 8, 7, 8, 7, 96)
      x = None
      permute_2 = x_1.permute(0, 1, 3, 2, 4, 5)
      x_1 = None
      x_2 = permute_2.reshape(64, 49, 96)
      permute_2 = None
      qkv = torch._C._nn.linear(x_2,
          getattr_getattr_l__self___features___1_____0___attn_qkv_weight,
          getattr_getattr_l__self___features___1_____0___attn_qkv_bias)
      x_2 = None
      getattr_getattr_l__self___features___1_____0___attn_qkv_weight = None
      getattr_getattr_l__self___features___1_____0___attn_qkv_bias = None
      reshape_1 = qkv.reshape(64, 49, 3, 3, 32)
      qkv = None
      qkv_1 = reshape_1.permute(2, 0, 3, 1, 4)
      reshape_1 = None
      q = qkv_1[0]
      k = qkv_1[1]
      v = qkv_1[2]
      qkv_1 = None
      q_1 = q * 0.1767766952966369
      q = None
      transpose = k.transpose(-2, -1)
      k = None
      attn = q_1.matmul(transpose)
      q_1 = None
      transpose = None
      attn_1 = attn + relative_position_bias_3
      attn = None
      relative_position_bias_3 = None
      attn_2 = torch.nn.functional.softmax(attn_1, dim=-1)
      attn_1 = None
      attn_3 = torch.nn.functional.dropout(attn_2, p=0.0, training=True)
      attn_2 = None
      matmul_1 = attn_3.matmul(v)
      attn_3 = None
      v = None
      transpose_1 = matmul_1.transpose(1, 2)
      matmul_1 = None
      x_3 = transpose_1.reshape(64, 49, 96)
      transpose_1 = None
      x_4 = torch._C._nn.linear(x_3,
          getattr_getattr_l__self___features___1_____0___attn_proj_weight,
          getattr_getattr_l__self___features___1_____0___attn_proj_bias)
      x_3 = None
      getattr_getattr_l__self___features___1_____0___attn_proj_weight = None
      getattr_getattr_l__self___features___1_____0___attn_proj_bias = None
      x_5 = torch.nn.functional.dropout(x_4, p=0.0, training=True)
      x_4 = None
      x_6 = x_5.view(1, 8, 8, 7, 7, 96)
      x_5 = None
      permute_4 = x_6.permute(0, 1, 3, 2, 4, 5)
      x_6 = None
      x_7 = permute_4.reshape(1, 56, 56, 96)
      permute_4 = None
      getitem_4 = x_7[slice(None, None, None), slice(None, 56, None), slice(None,
          56, None), slice(None, None, None)]
      x_7 = None
      x_8 = getitem_4.contiguous()
      getitem_4 = None
      _log_api_usage_once = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      x_9 = l__self___features_0_2 + x_8
      l__self___features_0_2 = None
      x_8 = None
      getattr_getattr_l__self___features___1_____0___norm2 = (self.
          getattr_getattr_L__self___features___1_____0___norm2(x_9))
      getattr_getattr_l__self___features___1_____0___mlp_0 = (self.
          getattr_getattr_L__self___features___1_____0___mlp_0(
          getattr_getattr_l__self___features___1_____0___norm2))
      getattr_getattr_l__self___features___1_____0___norm2 = None
      getattr_getattr_l__self___features___1_____0___mlp_1 = (self.
          getattr_getattr_L__self___features___1_____0___mlp_1(
          getattr_getattr_l__self___features___1_____0___mlp_0))
      getattr_getattr_l__self___features___1_____0___mlp_0 = None
      getattr_getattr_l__self___features___1_____0___mlp_2 = (self.
          getattr_getattr_L__self___features___1_____0___mlp_2(
          getattr_getattr_l__self___features___1_____0___mlp_1))
      getattr_getattr_l__self___features___1_____0___mlp_1 = None
      getattr_getattr_l__self___features___1_____0___mlp_3 = (self.
          getattr_getattr_L__self___features___1_____0___mlp_3(
          getattr_getattr_l__self___features___1_____0___mlp_2))
      getattr_getattr_l__self___features___1_____0___mlp_2 = None
      getattr_getattr_l__self___features___1_____0___mlp_4 = (self.
          getattr_getattr_L__self___features___1_____0___mlp_4(
          getattr_getattr_l__self___features___1_____0___mlp_3))
      getattr_getattr_l__self___features___1_____0___mlp_3 = None
      _log_api_usage_once_1 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      x_10 = x_9 + getattr_getattr_l__self___features___1_____0___mlp_4
      x_9 = None
      getattr_getattr_l__self___features___1_____0___mlp_4 = None
      getattr_getattr_l__self___features___1_____1___norm1 = (self.
          getattr_getattr_L__self___features___1_____1___norm1(x_10))
      (
          getattr_getattr_l__self___features___1_____1___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___1_____1___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___1_____1___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___1_____1___attn_relative_position_index
          )
      relative_position_bias_4 = (
          getattr_getattr_l__self___features___1_____1___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___1_____1___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___1_____1___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___1_____1___attn_relative_position_index
          ) = None
      relative_position_bias_5 = relative_position_bias_4.view(49, 49, -1)
      relative_position_bias_4 = None
      permute_5 = relative_position_bias_5.permute(2, 0, 1)
      relative_position_bias_5 = None
      contiguous_2 = permute_5.contiguous()
      permute_5 = None
      relative_position_bias_7 = contiguous_2.unsqueeze(0)
      contiguous_2 = None
      getattr_getattr_l__self___features___1_____1___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___1_____1___attn_qkv_weight)
      getattr_getattr_l__self___features___1_____1___attn_proj_weight = (self.
          getattr_getattr_L__self___features___1_____1___attn_proj_weight)
      getattr_getattr_l__self___features___1_____1___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___1_____1___attn_qkv_bias)
      getattr_getattr_l__self___features___1_____1___attn_proj_bias = (self.
          getattr_getattr_L__self___features___1_____1___attn_proj_bias)
      x_11 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___1_____1___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___1_____1___norm1 = None
      x_12 = torch.roll(x_11, shifts=(-3, -3), dims=(1, 2))
      x_11 = None
      x_13 = x_12.view(1, 8, 7, 8, 7, 96)
      x_12 = None
      permute_6 = x_13.permute(0, 1, 3, 2, 4, 5)
      x_13 = None
      x_14 = permute_6.reshape(64, 49, 96)
      permute_6 = None
      qkv_2 = torch._C._nn.linear(x_14,
          getattr_getattr_l__self___features___1_____1___attn_qkv_weight,
          getattr_getattr_l__self___features___1_____1___attn_qkv_bias)
      getattr_getattr_l__self___features___1_____1___attn_qkv_weight = None
      getattr_getattr_l__self___features___1_____1___attn_qkv_bias = None
      reshape_5 = qkv_2.reshape(64, 49, 3, 3, 32)
      qkv_2 = None
      qkv_3 = reshape_5.permute(2, 0, 3, 1, 4)
      reshape_5 = None
      q_2 = qkv_3[0]
      k_1 = qkv_3[1]
      v_1 = qkv_3[2]
      qkv_3 = None
      q_3 = q_2 * 0.1767766952966369
      q_2 = None
      transpose_2 = k_1.transpose(-2, -1)
      k_1 = None
      attn_4 = q_3.matmul(transpose_2)
      q_3 = None
      transpose_2 = None
      attn_5 = attn_4 + relative_position_bias_7
      attn_4 = None
      relative_position_bias_7 = None
      attn_mask = x_14.new_zeros((56, 56))
      x_14 = None
      attn_mask[slice(0, -7, None), slice(0, -7, None)] = 0
      setitem = attn_mask
      attn_mask[slice(0, -7, None), slice(-7, -3, None)] = 1
      setitem_1 = attn_mask
      attn_mask[slice(0, -7, None), slice(-3, None, None)] = 2
      setitem_2 = attn_mask
      attn_mask[slice(-7, -3, None), slice(0, -7, None)] = 3
      setitem_3 = attn_mask
      attn_mask[slice(-7, -3, None), slice(-7, -3, None)] = 4
      setitem_4 = attn_mask
      attn_mask[slice(-7, -3, None), slice(-3, None, None)] = 5
      setitem_5 = attn_mask
      attn_mask[slice(-3, None, None), slice(0, -7, None)] = 6
      setitem_6 = attn_mask
      attn_mask[slice(-3, None, None), slice(-7, -3, None)] = 7
      setitem_7 = attn_mask
      attn_mask[slice(-3, None, None), slice(-3, None, None)] = 8
      setitem_8 = attn_mask
      attn_mask_1 = attn_mask.view(8, 7, 8, 7)
      attn_mask = None
      permute_8 = attn_mask_1.permute(0, 2, 1, 3)
      attn_mask_1 = None
      attn_mask_2 = permute_8.reshape(64, 49)
      permute_8 = None
      unsqueeze_2 = attn_mask_2.unsqueeze(1)
      unsqueeze_3 = attn_mask_2.unsqueeze(2)
      attn_mask_2 = None
      attn_mask_3 = unsqueeze_2 - unsqueeze_3
      unsqueeze_2 = None
      unsqueeze_3 = None
      ne = attn_mask_3 != 0
      masked_fill = attn_mask_3.masked_fill(ne, -100.0)
      ne = None
      eq = attn_mask_3 == 0
      attn_mask_3 = None
      attn_mask_4 = masked_fill.masked_fill(eq, 0.0)
      masked_fill = None
      eq = None
      attn_6 = attn_5.view(1, 64, 3, 49, 49)
      attn_5 = None
      unsqueeze_4 = attn_mask_4.unsqueeze(1)
      attn_mask_4 = None
      unsqueeze_5 = unsqueeze_4.unsqueeze(0)
      unsqueeze_4 = None
      attn_7 = attn_6 + unsqueeze_5
      attn_6 = None
      unsqueeze_5 = None
      attn_8 = attn_7.view(-1, 3, 49, 49)
      attn_7 = None
      attn_9 = torch.nn.functional.softmax(attn_8, dim=-1)
      attn_8 = None
      attn_10 = torch.nn.functional.dropout(attn_9, p=0.0, training=True)
      attn_9 = None
      matmul_3 = attn_10.matmul(v_1)
      attn_10 = None
      v_1 = None
      transpose_3 = matmul_3.transpose(1, 2)
      matmul_3 = None
      x_15 = transpose_3.reshape(64, 49, 96)
      transpose_3 = None
      x_16 = torch._C._nn.linear(x_15,
          getattr_getattr_l__self___features___1_____1___attn_proj_weight,
          getattr_getattr_l__self___features___1_____1___attn_proj_bias)
      x_15 = None
      getattr_getattr_l__self___features___1_____1___attn_proj_weight = None
      getattr_getattr_l__self___features___1_____1___attn_proj_bias = None
      x_17 = torch.nn.functional.dropout(x_16, p=0.0, training=True)
      x_16 = None
      x_18 = x_17.view(1, 8, 8, 7, 7, 96)
      x_17 = None
      permute_9 = x_18.permute(0, 1, 3, 2, 4, 5)
      x_18 = None
      x_19 = permute_9.reshape(1, 56, 56, 96)
      permute_9 = None
      x_20 = torch.roll(x_19, shifts=(3, 3), dims=(1, 2))
      x_19 = None
      getitem_9 = x_20[slice(None, None, None), slice(None, 56, None), slice(None,
          56, None), slice(None, None, None)]
      x_20 = None
      x_21 = getitem_9.contiguous()
      getitem_9 = None
      _log_api_usage_once_2 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2601 = []
      __temp_2601.extend((1, 1, 1, 1))
      noise = torch.empty(__temp_2601, dtype=torch.float32, device=device(type='cpu')
          )
      noise_1 = noise.bernoulli_(0.9818181818181818)
      noise = None
      div_ = noise_1.div_(0.9818181818181818)
      mul_2 = x_21 * noise_1
      x_21 = None
      noise_1 = None
      x_22 = x_10 + mul_2
      x_10 = None
      mul_2 = None
      getattr_getattr_l__self___features___1_____1___norm2 = (self.
          getattr_getattr_L__self___features___1_____1___norm2(x_22))
      getattr_getattr_l__self___features___1_____1___mlp_0 = (self.
          getattr_getattr_L__self___features___1_____1___mlp_0(
          getattr_getattr_l__self___features___1_____1___norm2))
      getattr_getattr_l__self___features___1_____1___norm2 = None
      getattr_getattr_l__self___features___1_____1___mlp_1 = (self.
          getattr_getattr_L__self___features___1_____1___mlp_1(
          getattr_getattr_l__self___features___1_____1___mlp_0))
      getattr_getattr_l__self___features___1_____1___mlp_0 = None
      getattr_getattr_l__self___features___1_____1___mlp_2 = (self.
          getattr_getattr_L__self___features___1_____1___mlp_2(
          getattr_getattr_l__self___features___1_____1___mlp_1))
      getattr_getattr_l__self___features___1_____1___mlp_1 = None
      getattr_getattr_l__self___features___1_____1___mlp_3 = (self.
          getattr_getattr_L__self___features___1_____1___mlp_3(
          getattr_getattr_l__self___features___1_____1___mlp_2))
      getattr_getattr_l__self___features___1_____1___mlp_2 = None
      getattr_getattr_l__self___features___1_____1___mlp_4 = (self.
          getattr_getattr_L__self___features___1_____1___mlp_4(
          getattr_getattr_l__self___features___1_____1___mlp_3))
      getattr_getattr_l__self___features___1_____1___mlp_3 = None
      _log_api_usage_once_3 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2614 = []
      __temp_2614.extend((1, 1, 1, 1))
      noise_2 = torch.empty(__temp_2614, dtype=torch.float32, device=device(type=
          'cpu'))
      noise_3 = noise_2.bernoulli_(0.9818181818181818)
      noise_2 = None
      div__1 = noise_3.div_(0.9818181818181818)
      mul_3 = getattr_getattr_l__self___features___1_____1___mlp_4 * noise_3
      getattr_getattr_l__self___features___1_____1___mlp_4 = None
      noise_3 = None
      x_23 = x_22 + mul_3
      x_22 = None
      mul_3 = None
      x_24 = torch.nn.functional.pad(x_23, (0, 0, 0, 0, 0, 0))
      x_23 = None
      x0 = x_24[Ellipsis, slice(0, None, 2), slice(0, None, 2), slice(None, None,
          None)]
      x1 = x_24[Ellipsis, slice(1, None, 2), slice(0, None, 2), slice(None, None,
          None)]
      x2 = x_24[Ellipsis, slice(0, None, 2), slice(1, None, 2), slice(None, None,
          None)]
      x3 = x_24[Ellipsis, slice(1, None, 2), slice(1, None, 2), slice(None, None,
          None)]
      x_24 = None
      x_26 = torch.cat([x0, x1, x2, x3], -1)
      x0 = None
      x1 = None
      x2 = None
      x3 = None
      x_27 = self.getattr_L__self___features___2___norm(x_26)
      x_26 = None
      x_28 = self.getattr_L__self___features___2___reduction(x_27)
      x_27 = None
      getattr_getattr_l__self___features___3_____0___norm1 = (self.
          getattr_getattr_L__self___features___3_____0___norm1(x_28))
      (
          getattr_getattr_l__self___features___3_____0___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___3_____0___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___3_____0___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___3_____0___attn_relative_position_index
          )
      relative_position_bias_8 = (
          getattr_getattr_l__self___features___3_____0___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___3_____0___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___3_____0___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___3_____0___attn_relative_position_index
          ) = None
      relative_position_bias_9 = relative_position_bias_8.view(49, 49, -1)
      relative_position_bias_8 = None
      permute_10 = relative_position_bias_9.permute(2, 0, 1)
      relative_position_bias_9 = None
      contiguous_4 = permute_10.contiguous()
      permute_10 = None
      relative_position_bias_11 = contiguous_4.unsqueeze(0)
      contiguous_4 = None
      getattr_getattr_l__self___features___3_____0___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___3_____0___attn_qkv_weight)
      getattr_getattr_l__self___features___3_____0___attn_proj_weight = (self.
          getattr_getattr_L__self___features___3_____0___attn_proj_weight)
      getattr_getattr_l__self___features___3_____0___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___3_____0___attn_qkv_bias)
      getattr_getattr_l__self___features___3_____0___attn_proj_bias = (self.
          getattr_getattr_L__self___features___3_____0___attn_proj_bias)
      x_29 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___3_____0___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___3_____0___norm1 = None
      x_30 = x_29.view(1, 4, 7, 4, 7, 192)
      x_29 = None
      permute_11 = x_30.permute(0, 1, 3, 2, 4, 5)
      x_30 = None
      x_31 = permute_11.reshape(16, 49, 192)
      permute_11 = None
      qkv_4 = torch._C._nn.linear(x_31,
          getattr_getattr_l__self___features___3_____0___attn_qkv_weight,
          getattr_getattr_l__self___features___3_____0___attn_qkv_bias)
      x_31 = None
      getattr_getattr_l__self___features___3_____0___attn_qkv_weight = None
      getattr_getattr_l__self___features___3_____0___attn_qkv_bias = None
      reshape_10 = qkv_4.reshape(16, 49, 3, 6, 32)
      qkv_4 = None
      qkv_5 = reshape_10.permute(2, 0, 3, 1, 4)
      reshape_10 = None
      q_4 = qkv_5[0]
      k_2 = qkv_5[1]
      v_2 = qkv_5[2]
      qkv_5 = None
      q_5 = q_4 * 0.1767766952966369
      q_4 = None
      transpose_4 = k_2.transpose(-2, -1)
      k_2 = None
      attn_11 = q_5.matmul(transpose_4)
      q_5 = None
      transpose_4 = None
      attn_12 = attn_11 + relative_position_bias_11
      attn_11 = None
      relative_position_bias_11 = None
      attn_13 = torch.nn.functional.softmax(attn_12, dim=-1)
      attn_12 = None
      attn_14 = torch.nn.functional.dropout(attn_13, p=0.0, training=True)
      attn_13 = None
      matmul_5 = attn_14.matmul(v_2)
      attn_14 = None
      v_2 = None
      transpose_5 = matmul_5.transpose(1, 2)
      matmul_5 = None
      x_32 = transpose_5.reshape(16, 49, 192)
      transpose_5 = None
      x_33 = torch._C._nn.linear(x_32,
          getattr_getattr_l__self___features___3_____0___attn_proj_weight,
          getattr_getattr_l__self___features___3_____0___attn_proj_bias)
      x_32 = None
      getattr_getattr_l__self___features___3_____0___attn_proj_weight = None
      getattr_getattr_l__self___features___3_____0___attn_proj_bias = None
      x_34 = torch.nn.functional.dropout(x_33, p=0.0, training=True)
      x_33 = None
      x_35 = x_34.view(1, 4, 4, 7, 7, 192)
      x_34 = None
      permute_13 = x_35.permute(0, 1, 3, 2, 4, 5)
      x_35 = None
      x_36 = permute_13.reshape(1, 28, 28, 192)
      permute_13 = None
      getitem_18 = x_36[slice(None, None, None), slice(None, 28, None), slice(
          None, 28, None), slice(None, None, None)]
      x_36 = None
      x_37 = getitem_18.contiguous()
      getitem_18 = None
      _log_api_usage_once_4 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2667 = []
      __temp_2667.extend((1, 1, 1, 1))
      noise_4 = torch.empty(__temp_2667, dtype=torch.float32, device=device(type=
          'cpu'))
      noise_5 = noise_4.bernoulli_(0.9636363636363636)
      noise_4 = None
      div__2 = noise_5.div_(0.9636363636363636)
      mul_5 = x_37 * noise_5
      x_37 = None
      noise_5 = None
      x_38 = x_28 + mul_5
      x_28 = None
      mul_5 = None
      getattr_getattr_l__self___features___3_____0___norm2 = (self.
          getattr_getattr_L__self___features___3_____0___norm2(x_38))
      getattr_getattr_l__self___features___3_____0___mlp_0 = (self.
          getattr_getattr_L__self___features___3_____0___mlp_0(
          getattr_getattr_l__self___features___3_____0___norm2))
      getattr_getattr_l__self___features___3_____0___norm2 = None
      getattr_getattr_l__self___features___3_____0___mlp_1 = (self.
          getattr_getattr_L__self___features___3_____0___mlp_1(
          getattr_getattr_l__self___features___3_____0___mlp_0))
      getattr_getattr_l__self___features___3_____0___mlp_0 = None
      getattr_getattr_l__self___features___3_____0___mlp_2 = (self.
          getattr_getattr_L__self___features___3_____0___mlp_2(
          getattr_getattr_l__self___features___3_____0___mlp_1))
      getattr_getattr_l__self___features___3_____0___mlp_1 = None
      getattr_getattr_l__self___features___3_____0___mlp_3 = (self.
          getattr_getattr_L__self___features___3_____0___mlp_3(
          getattr_getattr_l__self___features___3_____0___mlp_2))
      getattr_getattr_l__self___features___3_____0___mlp_2 = None
      getattr_getattr_l__self___features___3_____0___mlp_4 = (self.
          getattr_getattr_L__self___features___3_____0___mlp_4(
          getattr_getattr_l__self___features___3_____0___mlp_3))
      getattr_getattr_l__self___features___3_____0___mlp_3 = None
      _log_api_usage_once_5 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2680 = []
      __temp_2680.extend((1, 1, 1, 1))
      noise_6 = torch.empty(__temp_2680, dtype=torch.float32, device=device(type=
          'cpu'))
      noise_7 = noise_6.bernoulli_(0.9636363636363636)
      noise_6 = None
      div__3 = noise_7.div_(0.9636363636363636)
      mul_6 = getattr_getattr_l__self___features___3_____0___mlp_4 * noise_7
      getattr_getattr_l__self___features___3_____0___mlp_4 = None
      noise_7 = None
      x_39 = x_38 + mul_6
      x_38 = None
      mul_6 = None
      getattr_getattr_l__self___features___3_____1___norm1 = (self.
          getattr_getattr_L__self___features___3_____1___norm1(x_39))
      (
          getattr_getattr_l__self___features___3_____1___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___3_____1___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___3_____1___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___3_____1___attn_relative_position_index
          )
      relative_position_bias_12 = (
          getattr_getattr_l__self___features___3_____1___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___3_____1___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___3_____1___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___3_____1___attn_relative_position_index
          ) = None
      relative_position_bias_13 = relative_position_bias_12.view(49, 49, -1)
      relative_position_bias_12 = None
      permute_14 = relative_position_bias_13.permute(2, 0, 1)
      relative_position_bias_13 = None
      contiguous_6 = permute_14.contiguous()
      permute_14 = None
      relative_position_bias_15 = contiguous_6.unsqueeze(0)
      contiguous_6 = None
      getattr_getattr_l__self___features___3_____1___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___3_____1___attn_qkv_weight)
      getattr_getattr_l__self___features___3_____1___attn_proj_weight = (self.
          getattr_getattr_L__self___features___3_____1___attn_proj_weight)
      getattr_getattr_l__self___features___3_____1___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___3_____1___attn_qkv_bias)
      getattr_getattr_l__self___features___3_____1___attn_proj_bias = (self.
          getattr_getattr_L__self___features___3_____1___attn_proj_bias)
      x_40 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___3_____1___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___3_____1___norm1 = None
      x_41 = torch.roll(x_40, shifts=(-3, -3), dims=(1, 2))
      x_40 = None
      x_42 = x_41.view(1, 4, 7, 4, 7, 192)
      x_41 = None
      permute_15 = x_42.permute(0, 1, 3, 2, 4, 5)
      x_42 = None
      x_43 = permute_15.reshape(16, 49, 192)
      permute_15 = None
      qkv_6 = torch._C._nn.linear(x_43,
          getattr_getattr_l__self___features___3_____1___attn_qkv_weight,
          getattr_getattr_l__self___features___3_____1___attn_qkv_bias)
      getattr_getattr_l__self___features___3_____1___attn_qkv_weight = None
      getattr_getattr_l__self___features___3_____1___attn_qkv_bias = None
      reshape_14 = qkv_6.reshape(16, 49, 3, 6, 32)
      qkv_6 = None
      qkv_7 = reshape_14.permute(2, 0, 3, 1, 4)
      reshape_14 = None
      q_6 = qkv_7[0]
      k_3 = qkv_7[1]
      v_3 = qkv_7[2]
      qkv_7 = None
      q_7 = q_6 * 0.1767766952966369
      q_6 = None
      transpose_6 = k_3.transpose(-2, -1)
      k_3 = None
      attn_15 = q_7.matmul(transpose_6)
      q_7 = None
      transpose_6 = None
      attn_16 = attn_15 + relative_position_bias_15
      attn_15 = None
      relative_position_bias_15 = None
      attn_mask_5 = x_43.new_zeros((28, 28))
      x_43 = None
      attn_mask_5[slice(0, -7, None), slice(0, -7, None)] = 0
      setitem_9 = attn_mask_5
      attn_mask_5[slice(0, -7, None), slice(-7, -3, None)] = 1
      setitem_10 = attn_mask_5
      attn_mask_5[slice(0, -7, None), slice(-3, None, None)] = 2
      setitem_11 = attn_mask_5
      attn_mask_5[slice(-7, -3, None), slice(0, -7, None)] = 3
      setitem_12 = attn_mask_5
      attn_mask_5[slice(-7, -3, None), slice(-7, -3, None)] = 4
      setitem_13 = attn_mask_5
      attn_mask_5[slice(-7, -3, None), slice(-3, None, None)] = 5
      setitem_14 = attn_mask_5
      attn_mask_5[slice(-3, None, None), slice(0, -7, None)] = 6
      setitem_15 = attn_mask_5
      attn_mask_5[slice(-3, None, None), slice(-7, -3, None)] = 7
      setitem_16 = attn_mask_5
      attn_mask_5[slice(-3, None, None), slice(-3, None, None)] = 8
      setitem_17 = attn_mask_5
      attn_mask_6 = attn_mask_5.view(4, 7, 4, 7)
      attn_mask_5 = None
      permute_17 = attn_mask_6.permute(0, 2, 1, 3)
      attn_mask_6 = None
      attn_mask_7 = permute_17.reshape(16, 49)
      permute_17 = None
      unsqueeze_8 = attn_mask_7.unsqueeze(1)
      unsqueeze_9 = attn_mask_7.unsqueeze(2)
      attn_mask_7 = None
      attn_mask_8 = unsqueeze_8 - unsqueeze_9
      unsqueeze_8 = None
      unsqueeze_9 = None
      ne_1 = attn_mask_8 != 0
      masked_fill_2 = attn_mask_8.masked_fill(ne_1, -100.0)
      ne_1 = None
      eq_1 = attn_mask_8 == 0
      attn_mask_8 = None
      attn_mask_9 = masked_fill_2.masked_fill(eq_1, 0.0)
      masked_fill_2 = None
      eq_1 = None
      attn_17 = attn_16.view(1, 16, 6, 49, 49)
      attn_16 = None
      unsqueeze_10 = attn_mask_9.unsqueeze(1)
      attn_mask_9 = None
      unsqueeze_11 = unsqueeze_10.unsqueeze(0)
      unsqueeze_10 = None
      attn_18 = attn_17 + unsqueeze_11
      attn_17 = None
      unsqueeze_11 = None
      attn_19 = attn_18.view(-1, 6, 49, 49)
      attn_18 = None
      attn_20 = torch.nn.functional.softmax(attn_19, dim=-1)
      attn_19 = None
      attn_21 = torch.nn.functional.dropout(attn_20, p=0.0, training=True)
      attn_20 = None
      matmul_7 = attn_21.matmul(v_3)
      attn_21 = None
      v_3 = None
      transpose_7 = matmul_7.transpose(1, 2)
      matmul_7 = None
      x_44 = transpose_7.reshape(16, 49, 192)
      transpose_7 = None
      x_45 = torch._C._nn.linear(x_44,
          getattr_getattr_l__self___features___3_____1___attn_proj_weight,
          getattr_getattr_l__self___features___3_____1___attn_proj_bias)
      x_44 = None
      getattr_getattr_l__self___features___3_____1___attn_proj_weight = None
      getattr_getattr_l__self___features___3_____1___attn_proj_bias = None
      x_46 = torch.nn.functional.dropout(x_45, p=0.0, training=True)
      x_45 = None
      x_47 = x_46.view(1, 4, 4, 7, 7, 192)
      x_46 = None
      permute_18 = x_47.permute(0, 1, 3, 2, 4, 5)
      x_47 = None
      x_48 = permute_18.reshape(1, 28, 28, 192)
      permute_18 = None
      x_49 = torch.roll(x_48, shifts=(3, 3), dims=(1, 2))
      x_48 = None
      getitem_23 = x_49[slice(None, None, None), slice(None, 28, None), slice(
          None, 28, None), slice(None, None, None)]
      x_49 = None
      x_50 = getitem_23.contiguous()
      getitem_23 = None
      _log_api_usage_once_6 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2748 = []
      __temp_2748.extend((1, 1, 1, 1))
      noise_8 = torch.empty(__temp_2748, dtype=torch.float32, device=device(type=
          'cpu'))
      noise_9 = noise_8.bernoulli_(0.9454545454545454)
      noise_8 = None
      div__4 = noise_9.div_(0.9454545454545454)
      mul_8 = x_50 * noise_9
      x_50 = None
      noise_9 = None
      x_51 = x_39 + mul_8
      x_39 = None
      mul_8 = None
      getattr_getattr_l__self___features___3_____1___norm2 = (self.
          getattr_getattr_L__self___features___3_____1___norm2(x_51))
      getattr_getattr_l__self___features___3_____1___mlp_0 = (self.
          getattr_getattr_L__self___features___3_____1___mlp_0(
          getattr_getattr_l__self___features___3_____1___norm2))
      getattr_getattr_l__self___features___3_____1___norm2 = None
      getattr_getattr_l__self___features___3_____1___mlp_1 = (self.
          getattr_getattr_L__self___features___3_____1___mlp_1(
          getattr_getattr_l__self___features___3_____1___mlp_0))
      getattr_getattr_l__self___features___3_____1___mlp_0 = None
      getattr_getattr_l__self___features___3_____1___mlp_2 = (self.
          getattr_getattr_L__self___features___3_____1___mlp_2(
          getattr_getattr_l__self___features___3_____1___mlp_1))
      getattr_getattr_l__self___features___3_____1___mlp_1 = None
      getattr_getattr_l__self___features___3_____1___mlp_3 = (self.
          getattr_getattr_L__self___features___3_____1___mlp_3(
          getattr_getattr_l__self___features___3_____1___mlp_2))
      getattr_getattr_l__self___features___3_____1___mlp_2 = None
      getattr_getattr_l__self___features___3_____1___mlp_4 = (self.
          getattr_getattr_L__self___features___3_____1___mlp_4(
          getattr_getattr_l__self___features___3_____1___mlp_3))
      getattr_getattr_l__self___features___3_____1___mlp_3 = None
      _log_api_usage_once_7 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2761 = []
      __temp_2761.extend((1, 1, 1, 1))
      noise_10 = torch.empty(__temp_2761, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_11 = noise_10.bernoulli_(0.9454545454545454)
      noise_10 = None
      div__5 = noise_11.div_(0.9454545454545454)
      mul_9 = getattr_getattr_l__self___features___3_____1___mlp_4 * noise_11
      getattr_getattr_l__self___features___3_____1___mlp_4 = None
      noise_11 = None
      x_52 = x_51 + mul_9
      x_51 = None
      mul_9 = None
      x_53 = torch.nn.functional.pad(x_52, (0, 0, 0, 0, 0, 0))
      x_52 = None
      x0_1 = x_53[Ellipsis, slice(0, None, 2), slice(0, None, 2), slice(None,
          None, None)]
      x1_1 = x_53[Ellipsis, slice(1, None, 2), slice(0, None, 2), slice(None,
          None, None)]
      x2_1 = x_53[Ellipsis, slice(0, None, 2), slice(1, None, 2), slice(None,
          None, None)]
      x3_1 = x_53[Ellipsis, slice(1, None, 2), slice(1, None, 2), slice(None,
          None, None)]
      x_53 = None
      x_55 = torch.cat([x0_1, x1_1, x2_1, x3_1], -1)
      x0_1 = None
      x1_1 = None
      x2_1 = None
      x3_1 = None
      x_56 = self.getattr_L__self___features___4___norm(x_55)
      x_55 = None
      x_57 = self.getattr_L__self___features___4___reduction(x_56)
      x_56 = None
      getattr_getattr_l__self___features___5_____0___norm1 = (self.
          getattr_getattr_L__self___features___5_____0___norm1(x_57))
      (
          getattr_getattr_l__self___features___5_____0___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___5_____0___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___5_____0___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___5_____0___attn_relative_position_index
          )
      relative_position_bias_16 = (
          getattr_getattr_l__self___features___5_____0___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___5_____0___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___5_____0___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___5_____0___attn_relative_position_index
          ) = None
      relative_position_bias_17 = relative_position_bias_16.view(49, 49, -1)
      relative_position_bias_16 = None
      permute_19 = relative_position_bias_17.permute(2, 0, 1)
      relative_position_bias_17 = None
      contiguous_8 = permute_19.contiguous()
      permute_19 = None
      relative_position_bias_19 = contiguous_8.unsqueeze(0)
      contiguous_8 = None
      getattr_getattr_l__self___features___5_____0___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___5_____0___attn_qkv_weight)
      getattr_getattr_l__self___features___5_____0___attn_proj_weight = (self.
          getattr_getattr_L__self___features___5_____0___attn_proj_weight)
      getattr_getattr_l__self___features___5_____0___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___5_____0___attn_qkv_bias)
      getattr_getattr_l__self___features___5_____0___attn_proj_bias = (self.
          getattr_getattr_L__self___features___5_____0___attn_proj_bias)
      x_58 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___5_____0___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___5_____0___norm1 = None
      x_59 = x_58.view(1, 2, 7, 2, 7, 384)
      x_58 = None
      permute_20 = x_59.permute(0, 1, 3, 2, 4, 5)
      x_59 = None
      x_60 = permute_20.reshape(4, 49, 384)
      permute_20 = None
      qkv_8 = torch._C._nn.linear(x_60,
          getattr_getattr_l__self___features___5_____0___attn_qkv_weight,
          getattr_getattr_l__self___features___5_____0___attn_qkv_bias)
      x_60 = None
      getattr_getattr_l__self___features___5_____0___attn_qkv_weight = None
      getattr_getattr_l__self___features___5_____0___attn_qkv_bias = None
      reshape_19 = qkv_8.reshape(4, 49, 3, 12, 32)
      qkv_8 = None
      qkv_9 = reshape_19.permute(2, 0, 3, 1, 4)
      reshape_19 = None
      q_8 = qkv_9[0]
      k_4 = qkv_9[1]
      v_4 = qkv_9[2]
      qkv_9 = None
      q_9 = q_8 * 0.1767766952966369
      q_8 = None
      transpose_8 = k_4.transpose(-2, -1)
      k_4 = None
      attn_22 = q_9.matmul(transpose_8)
      q_9 = None
      transpose_8 = None
      attn_23 = attn_22 + relative_position_bias_19
      attn_22 = None
      relative_position_bias_19 = None
      attn_24 = torch.nn.functional.softmax(attn_23, dim=-1)
      attn_23 = None
      attn_25 = torch.nn.functional.dropout(attn_24, p=0.0, training=True)
      attn_24 = None
      matmul_9 = attn_25.matmul(v_4)
      attn_25 = None
      v_4 = None
      transpose_9 = matmul_9.transpose(1, 2)
      matmul_9 = None
      x_61 = transpose_9.reshape(4, 49, 384)
      transpose_9 = None
      x_62 = torch._C._nn.linear(x_61,
          getattr_getattr_l__self___features___5_____0___attn_proj_weight,
          getattr_getattr_l__self___features___5_____0___attn_proj_bias)
      x_61 = None
      getattr_getattr_l__self___features___5_____0___attn_proj_weight = None
      getattr_getattr_l__self___features___5_____0___attn_proj_bias = None
      x_63 = torch.nn.functional.dropout(x_62, p=0.0, training=True)
      x_62 = None
      x_64 = x_63.view(1, 2, 2, 7, 7, 384)
      x_63 = None
      permute_22 = x_64.permute(0, 1, 3, 2, 4, 5)
      x_64 = None
      x_65 = permute_22.reshape(1, 14, 14, 384)
      permute_22 = None
      getitem_32 = x_65[slice(None, None, None), slice(None, 14, None), slice(
          None, 14, None), slice(None, None, None)]
      x_65 = None
      x_66 = getitem_32.contiguous()
      getitem_32 = None
      _log_api_usage_once_8 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2814 = []
      __temp_2814.extend((1, 1, 1, 1))
      noise_12 = torch.empty(__temp_2814, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_13 = noise_12.bernoulli_(0.9272727272727272)
      noise_12 = None
      div__6 = noise_13.div_(0.9272727272727272)
      mul_11 = x_66 * noise_13
      x_66 = None
      noise_13 = None
      x_67 = x_57 + mul_11
      x_57 = None
      mul_11 = None
      getattr_getattr_l__self___features___5_____0___norm2 = (self.
          getattr_getattr_L__self___features___5_____0___norm2(x_67))
      getattr_getattr_l__self___features___5_____0___mlp_0 = (self.
          getattr_getattr_L__self___features___5_____0___mlp_0(
          getattr_getattr_l__self___features___5_____0___norm2))
      getattr_getattr_l__self___features___5_____0___norm2 = None
      getattr_getattr_l__self___features___5_____0___mlp_1 = (self.
          getattr_getattr_L__self___features___5_____0___mlp_1(
          getattr_getattr_l__self___features___5_____0___mlp_0))
      getattr_getattr_l__self___features___5_____0___mlp_0 = None
      getattr_getattr_l__self___features___5_____0___mlp_2 = (self.
          getattr_getattr_L__self___features___5_____0___mlp_2(
          getattr_getattr_l__self___features___5_____0___mlp_1))
      getattr_getattr_l__self___features___5_____0___mlp_1 = None
      getattr_getattr_l__self___features___5_____0___mlp_3 = (self.
          getattr_getattr_L__self___features___5_____0___mlp_3(
          getattr_getattr_l__self___features___5_____0___mlp_2))
      getattr_getattr_l__self___features___5_____0___mlp_2 = None
      getattr_getattr_l__self___features___5_____0___mlp_4 = (self.
          getattr_getattr_L__self___features___5_____0___mlp_4(
          getattr_getattr_l__self___features___5_____0___mlp_3))
      getattr_getattr_l__self___features___5_____0___mlp_3 = None
      _log_api_usage_once_9 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2827 = []
      __temp_2827.extend((1, 1, 1, 1))
      noise_14 = torch.empty(__temp_2827, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_15 = noise_14.bernoulli_(0.9272727272727272)
      noise_14 = None
      div__7 = noise_15.div_(0.9272727272727272)
      mul_12 = getattr_getattr_l__self___features___5_____0___mlp_4 * noise_15
      getattr_getattr_l__self___features___5_____0___mlp_4 = None
      noise_15 = None
      x_68 = x_67 + mul_12
      x_67 = None
      mul_12 = None
      getattr_getattr_l__self___features___5_____1___norm1 = (self.
          getattr_getattr_L__self___features___5_____1___norm1(x_68))
      (
          getattr_getattr_l__self___features___5_____1___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___5_____1___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___5_____1___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___5_____1___attn_relative_position_index
          )
      relative_position_bias_20 = (
          getattr_getattr_l__self___features___5_____1___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___5_____1___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___5_____1___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___5_____1___attn_relative_position_index
          ) = None
      relative_position_bias_21 = relative_position_bias_20.view(49, 49, -1)
      relative_position_bias_20 = None
      permute_23 = relative_position_bias_21.permute(2, 0, 1)
      relative_position_bias_21 = None
      contiguous_10 = permute_23.contiguous()
      permute_23 = None
      relative_position_bias_23 = contiguous_10.unsqueeze(0)
      contiguous_10 = None
      getattr_getattr_l__self___features___5_____1___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___5_____1___attn_qkv_weight)
      getattr_getattr_l__self___features___5_____1___attn_proj_weight = (self.
          getattr_getattr_L__self___features___5_____1___attn_proj_weight)
      getattr_getattr_l__self___features___5_____1___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___5_____1___attn_qkv_bias)
      getattr_getattr_l__self___features___5_____1___attn_proj_bias = (self.
          getattr_getattr_L__self___features___5_____1___attn_proj_bias)
      x_69 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___5_____1___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___5_____1___norm1 = None
      x_70 = torch.roll(x_69, shifts=(-3, -3), dims=(1, 2))
      x_69 = None
      x_71 = x_70.view(1, 2, 7, 2, 7, 384)
      x_70 = None
      permute_24 = x_71.permute(0, 1, 3, 2, 4, 5)
      x_71 = None
      x_72 = permute_24.reshape(4, 49, 384)
      permute_24 = None
      qkv_10 = torch._C._nn.linear(x_72,
          getattr_getattr_l__self___features___5_____1___attn_qkv_weight,
          getattr_getattr_l__self___features___5_____1___attn_qkv_bias)
      getattr_getattr_l__self___features___5_____1___attn_qkv_weight = None
      getattr_getattr_l__self___features___5_____1___attn_qkv_bias = None
      reshape_23 = qkv_10.reshape(4, 49, 3, 12, 32)
      qkv_10 = None
      qkv_11 = reshape_23.permute(2, 0, 3, 1, 4)
      reshape_23 = None
      q_10 = qkv_11[0]
      k_5 = qkv_11[1]
      v_5 = qkv_11[2]
      qkv_11 = None
      q_11 = q_10 * 0.1767766952966369
      q_10 = None
      transpose_10 = k_5.transpose(-2, -1)
      k_5 = None
      attn_26 = q_11.matmul(transpose_10)
      q_11 = None
      transpose_10 = None
      attn_27 = attn_26 + relative_position_bias_23
      attn_26 = None
      relative_position_bias_23 = None
      attn_mask_10 = x_72.new_zeros((14, 14))
      x_72 = None
      attn_mask_10[slice(0, -7, None), slice(0, -7, None)] = 0
      setitem_18 = attn_mask_10
      attn_mask_10[slice(0, -7, None), slice(-7, -3, None)] = 1
      setitem_19 = attn_mask_10
      attn_mask_10[slice(0, -7, None), slice(-3, None, None)] = 2
      setitem_20 = attn_mask_10
      attn_mask_10[slice(-7, -3, None), slice(0, -7, None)] = 3
      setitem_21 = attn_mask_10
      attn_mask_10[slice(-7, -3, None), slice(-7, -3, None)] = 4
      setitem_22 = attn_mask_10
      attn_mask_10[slice(-7, -3, None), slice(-3, None, None)] = 5
      setitem_23 = attn_mask_10
      attn_mask_10[slice(-3, None, None), slice(0, -7, None)] = 6
      setitem_24 = attn_mask_10
      attn_mask_10[slice(-3, None, None), slice(-7, -3, None)] = 7
      setitem_25 = attn_mask_10
      attn_mask_10[slice(-3, None, None), slice(-3, None, None)] = 8
      setitem_26 = attn_mask_10
      attn_mask_11 = attn_mask_10.view(2, 7, 2, 7)
      attn_mask_10 = None
      permute_26 = attn_mask_11.permute(0, 2, 1, 3)
      attn_mask_11 = None
      attn_mask_12 = permute_26.reshape(4, 49)
      permute_26 = None
      unsqueeze_14 = attn_mask_12.unsqueeze(1)
      unsqueeze_15 = attn_mask_12.unsqueeze(2)
      attn_mask_12 = None
      attn_mask_13 = unsqueeze_14 - unsqueeze_15
      unsqueeze_14 = None
      unsqueeze_15 = None
      ne_2 = attn_mask_13 != 0
      masked_fill_4 = attn_mask_13.masked_fill(ne_2, -100.0)
      ne_2 = None
      eq_2 = attn_mask_13 == 0
      attn_mask_13 = None
      attn_mask_14 = masked_fill_4.masked_fill(eq_2, 0.0)
      masked_fill_4 = None
      eq_2 = None
      attn_28 = attn_27.view(1, 4, 12, 49, 49)
      attn_27 = None
      unsqueeze_16 = attn_mask_14.unsqueeze(1)
      attn_mask_14 = None
      unsqueeze_17 = unsqueeze_16.unsqueeze(0)
      unsqueeze_16 = None
      attn_29 = attn_28 + unsqueeze_17
      attn_28 = None
      unsqueeze_17 = None
      attn_30 = attn_29.view(-1, 12, 49, 49)
      attn_29 = None
      attn_31 = torch.nn.functional.softmax(attn_30, dim=-1)
      attn_30 = None
      attn_32 = torch.nn.functional.dropout(attn_31, p=0.0, training=True)
      attn_31 = None
      matmul_11 = attn_32.matmul(v_5)
      attn_32 = None
      v_5 = None
      transpose_11 = matmul_11.transpose(1, 2)
      matmul_11 = None
      x_73 = transpose_11.reshape(4, 49, 384)
      transpose_11 = None
      x_74 = torch._C._nn.linear(x_73,
          getattr_getattr_l__self___features___5_____1___attn_proj_weight,
          getattr_getattr_l__self___features___5_____1___attn_proj_bias)
      x_73 = None
      getattr_getattr_l__self___features___5_____1___attn_proj_weight = None
      getattr_getattr_l__self___features___5_____1___attn_proj_bias = None
      x_75 = torch.nn.functional.dropout(x_74, p=0.0, training=True)
      x_74 = None
      x_76 = x_75.view(1, 2, 2, 7, 7, 384)
      x_75 = None
      permute_27 = x_76.permute(0, 1, 3, 2, 4, 5)
      x_76 = None
      x_77 = permute_27.reshape(1, 14, 14, 384)
      permute_27 = None
      x_78 = torch.roll(x_77, shifts=(3, 3), dims=(1, 2))
      x_77 = None
      getitem_37 = x_78[slice(None, None, None), slice(None, 14, None), slice(
          None, 14, None), slice(None, None, None)]
      x_78 = None
      x_79 = getitem_37.contiguous()
      getitem_37 = None
      _log_api_usage_once_10 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2895 = []
      __temp_2895.extend((1, 1, 1, 1))
      noise_16 = torch.empty(__temp_2895, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_17 = noise_16.bernoulli_(0.9090909090909091)
      noise_16 = None
      div__8 = noise_17.div_(0.9090909090909091)
      mul_14 = x_79 * noise_17
      x_79 = None
      noise_17 = None
      x_80 = x_68 + mul_14
      x_68 = None
      mul_14 = None
      getattr_getattr_l__self___features___5_____1___norm2 = (self.
          getattr_getattr_L__self___features___5_____1___norm2(x_80))
      getattr_getattr_l__self___features___5_____1___mlp_0 = (self.
          getattr_getattr_L__self___features___5_____1___mlp_0(
          getattr_getattr_l__self___features___5_____1___norm2))
      getattr_getattr_l__self___features___5_____1___norm2 = None
      getattr_getattr_l__self___features___5_____1___mlp_1 = (self.
          getattr_getattr_L__self___features___5_____1___mlp_1(
          getattr_getattr_l__self___features___5_____1___mlp_0))
      getattr_getattr_l__self___features___5_____1___mlp_0 = None
      getattr_getattr_l__self___features___5_____1___mlp_2 = (self.
          getattr_getattr_L__self___features___5_____1___mlp_2(
          getattr_getattr_l__self___features___5_____1___mlp_1))
      getattr_getattr_l__self___features___5_____1___mlp_1 = None
      getattr_getattr_l__self___features___5_____1___mlp_3 = (self.
          getattr_getattr_L__self___features___5_____1___mlp_3(
          getattr_getattr_l__self___features___5_____1___mlp_2))
      getattr_getattr_l__self___features___5_____1___mlp_2 = None
      getattr_getattr_l__self___features___5_____1___mlp_4 = (self.
          getattr_getattr_L__self___features___5_____1___mlp_4(
          getattr_getattr_l__self___features___5_____1___mlp_3))
      getattr_getattr_l__self___features___5_____1___mlp_3 = None
      _log_api_usage_once_11 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2908 = []
      __temp_2908.extend((1, 1, 1, 1))
      noise_18 = torch.empty(__temp_2908, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_19 = noise_18.bernoulli_(0.9090909090909091)
      noise_18 = None
      div__9 = noise_19.div_(0.9090909090909091)
      mul_15 = getattr_getattr_l__self___features___5_____1___mlp_4 * noise_19
      getattr_getattr_l__self___features___5_____1___mlp_4 = None
      noise_19 = None
      x_81 = x_80 + mul_15
      x_80 = None
      mul_15 = None
      getattr_getattr_l__self___features___5_____2___norm1 = (self.
          getattr_getattr_L__self___features___5_____2___norm1(x_81))
      (
          getattr_getattr_l__self___features___5_____2___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___5_____2___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___5_____2___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___5_____2___attn_relative_position_index
          )
      relative_position_bias_24 = (
          getattr_getattr_l__self___features___5_____2___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___5_____2___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___5_____2___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___5_____2___attn_relative_position_index
          ) = None
      relative_position_bias_25 = relative_position_bias_24.view(49, 49, -1)
      relative_position_bias_24 = None
      permute_28 = relative_position_bias_25.permute(2, 0, 1)
      relative_position_bias_25 = None
      contiguous_12 = permute_28.contiguous()
      permute_28 = None
      relative_position_bias_27 = contiguous_12.unsqueeze(0)
      contiguous_12 = None
      getattr_getattr_l__self___features___5_____2___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___5_____2___attn_qkv_weight)
      getattr_getattr_l__self___features___5_____2___attn_proj_weight = (self.
          getattr_getattr_L__self___features___5_____2___attn_proj_weight)
      getattr_getattr_l__self___features___5_____2___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___5_____2___attn_qkv_bias)
      getattr_getattr_l__self___features___5_____2___attn_proj_bias = (self.
          getattr_getattr_L__self___features___5_____2___attn_proj_bias)
      x_82 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___5_____2___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___5_____2___norm1 = None
      x_83 = x_82.view(1, 2, 7, 2, 7, 384)
      x_82 = None
      permute_29 = x_83.permute(0, 1, 3, 2, 4, 5)
      x_83 = None
      x_84 = permute_29.reshape(4, 49, 384)
      permute_29 = None
      qkv_12 = torch._C._nn.linear(x_84,
          getattr_getattr_l__self___features___5_____2___attn_qkv_weight,
          getattr_getattr_l__self___features___5_____2___attn_qkv_bias)
      x_84 = None
      getattr_getattr_l__self___features___5_____2___attn_qkv_weight = None
      getattr_getattr_l__self___features___5_____2___attn_qkv_bias = None
      reshape_28 = qkv_12.reshape(4, 49, 3, 12, 32)
      qkv_12 = None
      qkv_13 = reshape_28.permute(2, 0, 3, 1, 4)
      reshape_28 = None
      q_12 = qkv_13[0]
      k_6 = qkv_13[1]
      v_6 = qkv_13[2]
      qkv_13 = None
      q_13 = q_12 * 0.1767766952966369
      q_12 = None
      transpose_12 = k_6.transpose(-2, -1)
      k_6 = None
      attn_33 = q_13.matmul(transpose_12)
      q_13 = None
      transpose_12 = None
      attn_34 = attn_33 + relative_position_bias_27
      attn_33 = None
      relative_position_bias_27 = None
      attn_35 = torch.nn.functional.softmax(attn_34, dim=-1)
      attn_34 = None
      attn_36 = torch.nn.functional.dropout(attn_35, p=0.0, training=True)
      attn_35 = None
      matmul_13 = attn_36.matmul(v_6)
      attn_36 = None
      v_6 = None
      transpose_13 = matmul_13.transpose(1, 2)
      matmul_13 = None
      x_85 = transpose_13.reshape(4, 49, 384)
      transpose_13 = None
      x_86 = torch._C._nn.linear(x_85,
          getattr_getattr_l__self___features___5_____2___attn_proj_weight,
          getattr_getattr_l__self___features___5_____2___attn_proj_bias)
      x_85 = None
      getattr_getattr_l__self___features___5_____2___attn_proj_weight = None
      getattr_getattr_l__self___features___5_____2___attn_proj_bias = None
      x_87 = torch.nn.functional.dropout(x_86, p=0.0, training=True)
      x_86 = None
      x_88 = x_87.view(1, 2, 2, 7, 7, 384)
      x_87 = None
      permute_31 = x_88.permute(0, 1, 3, 2, 4, 5)
      x_88 = None
      x_89 = permute_31.reshape(1, 14, 14, 384)
      permute_31 = None
      getitem_42 = x_89[slice(None, None, None), slice(None, 14, None), slice(
          None, 14, None), slice(None, None, None)]
      x_89 = None
      x_90 = getitem_42.contiguous()
      getitem_42 = None
      _log_api_usage_once_12 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2944 = []
      __temp_2944.extend((1, 1, 1, 1))
      noise_20 = torch.empty(__temp_2944, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_21 = noise_20.bernoulli_(0.8909090909090909)
      noise_20 = None
      div__10 = noise_21.div_(0.8909090909090909)
      mul_17 = x_90 * noise_21
      x_90 = None
      noise_21 = None
      x_91 = x_81 + mul_17
      x_81 = None
      mul_17 = None
      getattr_getattr_l__self___features___5_____2___norm2 = (self.
          getattr_getattr_L__self___features___5_____2___norm2(x_91))
      getattr_getattr_l__self___features___5_____2___mlp_0 = (self.
          getattr_getattr_L__self___features___5_____2___mlp_0(
          getattr_getattr_l__self___features___5_____2___norm2))
      getattr_getattr_l__self___features___5_____2___norm2 = None
      getattr_getattr_l__self___features___5_____2___mlp_1 = (self.
          getattr_getattr_L__self___features___5_____2___mlp_1(
          getattr_getattr_l__self___features___5_____2___mlp_0))
      getattr_getattr_l__self___features___5_____2___mlp_0 = None
      getattr_getattr_l__self___features___5_____2___mlp_2 = (self.
          getattr_getattr_L__self___features___5_____2___mlp_2(
          getattr_getattr_l__self___features___5_____2___mlp_1))
      getattr_getattr_l__self___features___5_____2___mlp_1 = None
      getattr_getattr_l__self___features___5_____2___mlp_3 = (self.
          getattr_getattr_L__self___features___5_____2___mlp_3(
          getattr_getattr_l__self___features___5_____2___mlp_2))
      getattr_getattr_l__self___features___5_____2___mlp_2 = None
      getattr_getattr_l__self___features___5_____2___mlp_4 = (self.
          getattr_getattr_L__self___features___5_____2___mlp_4(
          getattr_getattr_l__self___features___5_____2___mlp_3))
      getattr_getattr_l__self___features___5_____2___mlp_3 = None
      _log_api_usage_once_13 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2957 = []
      __temp_2957.extend((1, 1, 1, 1))
      noise_22 = torch.empty(__temp_2957, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_23 = noise_22.bernoulli_(0.8909090909090909)
      noise_22 = None
      div__11 = noise_23.div_(0.8909090909090909)
      mul_18 = getattr_getattr_l__self___features___5_____2___mlp_4 * noise_23
      getattr_getattr_l__self___features___5_____2___mlp_4 = None
      noise_23 = None
      x_92 = x_91 + mul_18
      x_91 = None
      mul_18 = None
      getattr_getattr_l__self___features___5_____3___norm1 = (self.
          getattr_getattr_L__self___features___5_____3___norm1(x_92))
      (
          getattr_getattr_l__self___features___5_____3___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___5_____3___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___5_____3___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___5_____3___attn_relative_position_index
          )
      relative_position_bias_28 = (
          getattr_getattr_l__self___features___5_____3___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___5_____3___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___5_____3___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___5_____3___attn_relative_position_index
          ) = None
      relative_position_bias_29 = relative_position_bias_28.view(49, 49, -1)
      relative_position_bias_28 = None
      permute_32 = relative_position_bias_29.permute(2, 0, 1)
      relative_position_bias_29 = None
      contiguous_14 = permute_32.contiguous()
      permute_32 = None
      relative_position_bias_31 = contiguous_14.unsqueeze(0)
      contiguous_14 = None
      getattr_getattr_l__self___features___5_____3___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___5_____3___attn_qkv_weight)
      getattr_getattr_l__self___features___5_____3___attn_proj_weight = (self.
          getattr_getattr_L__self___features___5_____3___attn_proj_weight)
      getattr_getattr_l__self___features___5_____3___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___5_____3___attn_qkv_bias)
      getattr_getattr_l__self___features___5_____3___attn_proj_bias = (self.
          getattr_getattr_L__self___features___5_____3___attn_proj_bias)
      x_93 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___5_____3___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___5_____3___norm1 = None
      x_94 = torch.roll(x_93, shifts=(-3, -3), dims=(1, 2))
      x_93 = None
      x_95 = x_94.view(1, 2, 7, 2, 7, 384)
      x_94 = None
      permute_33 = x_95.permute(0, 1, 3, 2, 4, 5)
      x_95 = None
      x_96 = permute_33.reshape(4, 49, 384)
      permute_33 = None
      qkv_14 = torch._C._nn.linear(x_96,
          getattr_getattr_l__self___features___5_____3___attn_qkv_weight,
          getattr_getattr_l__self___features___5_____3___attn_qkv_bias)
      getattr_getattr_l__self___features___5_____3___attn_qkv_weight = None
      getattr_getattr_l__self___features___5_____3___attn_qkv_bias = None
      reshape_32 = qkv_14.reshape(4, 49, 3, 12, 32)
      qkv_14 = None
      qkv_15 = reshape_32.permute(2, 0, 3, 1, 4)
      reshape_32 = None
      q_14 = qkv_15[0]
      k_7 = qkv_15[1]
      v_7 = qkv_15[2]
      qkv_15 = None
      q_15 = q_14 * 0.1767766952966369
      q_14 = None
      transpose_14 = k_7.transpose(-2, -1)
      k_7 = None
      attn_37 = q_15.matmul(transpose_14)
      q_15 = None
      transpose_14 = None
      attn_38 = attn_37 + relative_position_bias_31
      attn_37 = None
      relative_position_bias_31 = None
      attn_mask_15 = x_96.new_zeros((14, 14))
      x_96 = None
      attn_mask_15[slice(0, -7, None), slice(0, -7, None)] = 0
      setitem_27 = attn_mask_15
      attn_mask_15[slice(0, -7, None), slice(-7, -3, None)] = 1
      setitem_28 = attn_mask_15
      attn_mask_15[slice(0, -7, None), slice(-3, None, None)] = 2
      setitem_29 = attn_mask_15
      attn_mask_15[slice(-7, -3, None), slice(0, -7, None)] = 3
      setitem_30 = attn_mask_15
      attn_mask_15[slice(-7, -3, None), slice(-7, -3, None)] = 4
      setitem_31 = attn_mask_15
      attn_mask_15[slice(-7, -3, None), slice(-3, None, None)] = 5
      setitem_32 = attn_mask_15
      attn_mask_15[slice(-3, None, None), slice(0, -7, None)] = 6
      setitem_33 = attn_mask_15
      attn_mask_15[slice(-3, None, None), slice(-7, -3, None)] = 7
      setitem_34 = attn_mask_15
      attn_mask_15[slice(-3, None, None), slice(-3, None, None)] = 8
      setitem_35 = attn_mask_15
      attn_mask_16 = attn_mask_15.view(2, 7, 2, 7)
      attn_mask_15 = None
      permute_35 = attn_mask_16.permute(0, 2, 1, 3)
      attn_mask_16 = None
      attn_mask_17 = permute_35.reshape(4, 49)
      permute_35 = None
      unsqueeze_20 = attn_mask_17.unsqueeze(1)
      unsqueeze_21 = attn_mask_17.unsqueeze(2)
      attn_mask_17 = None
      attn_mask_18 = unsqueeze_20 - unsqueeze_21
      unsqueeze_20 = None
      unsqueeze_21 = None
      ne_3 = attn_mask_18 != 0
      masked_fill_6 = attn_mask_18.masked_fill(ne_3, -100.0)
      ne_3 = None
      eq_3 = attn_mask_18 == 0
      attn_mask_18 = None
      attn_mask_19 = masked_fill_6.masked_fill(eq_3, 0.0)
      masked_fill_6 = None
      eq_3 = None
      attn_39 = attn_38.view(1, 4, 12, 49, 49)
      attn_38 = None
      unsqueeze_22 = attn_mask_19.unsqueeze(1)
      attn_mask_19 = None
      unsqueeze_23 = unsqueeze_22.unsqueeze(0)
      unsqueeze_22 = None
      attn_40 = attn_39 + unsqueeze_23
      attn_39 = None
      unsqueeze_23 = None
      attn_41 = attn_40.view(-1, 12, 49, 49)
      attn_40 = None
      attn_42 = torch.nn.functional.softmax(attn_41, dim=-1)
      attn_41 = None
      attn_43 = torch.nn.functional.dropout(attn_42, p=0.0, training=True)
      attn_42 = None
      matmul_15 = attn_43.matmul(v_7)
      attn_43 = None
      v_7 = None
      transpose_15 = matmul_15.transpose(1, 2)
      matmul_15 = None
      x_97 = transpose_15.reshape(4, 49, 384)
      transpose_15 = None
      x_98 = torch._C._nn.linear(x_97,
          getattr_getattr_l__self___features___5_____3___attn_proj_weight,
          getattr_getattr_l__self___features___5_____3___attn_proj_bias)
      x_97 = None
      getattr_getattr_l__self___features___5_____3___attn_proj_weight = None
      getattr_getattr_l__self___features___5_____3___attn_proj_bias = None
      x_99 = torch.nn.functional.dropout(x_98, p=0.0, training=True)
      x_98 = None
      x_100 = x_99.view(1, 2, 2, 7, 7, 384)
      x_99 = None
      permute_36 = x_100.permute(0, 1, 3, 2, 4, 5)
      x_100 = None
      x_101 = permute_36.reshape(1, 14, 14, 384)
      permute_36 = None
      x_102 = torch.roll(x_101, shifts=(3, 3), dims=(1, 2))
      x_101 = None
      getitem_47 = x_102[slice(None, None, None), slice(None, 14, None), slice(
          None, 14, None), slice(None, None, None)]
      x_102 = None
      x_103 = getitem_47.contiguous()
      getitem_47 = None
      _log_api_usage_once_14 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_3025 = []
      __temp_3025.extend((1, 1, 1, 1))
      noise_24 = torch.empty(__temp_3025, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_25 = noise_24.bernoulli_(0.8727272727272727)
      noise_24 = None
      div__12 = noise_25.div_(0.8727272727272727)
      mul_20 = x_103 * noise_25
      x_103 = None
      noise_25 = None
      x_104 = x_92 + mul_20
      x_92 = None
      mul_20 = None
      getattr_getattr_l__self___features___5_____3___norm2 = (self.
          getattr_getattr_L__self___features___5_____3___norm2(x_104))
      getattr_getattr_l__self___features___5_____3___mlp_0 = (self.
          getattr_getattr_L__self___features___5_____3___mlp_0(
          getattr_getattr_l__self___features___5_____3___norm2))
      getattr_getattr_l__self___features___5_____3___norm2 = None
      getattr_getattr_l__self___features___5_____3___mlp_1 = (self.
          getattr_getattr_L__self___features___5_____3___mlp_1(
          getattr_getattr_l__self___features___5_____3___mlp_0))
      getattr_getattr_l__self___features___5_____3___mlp_0 = None
      getattr_getattr_l__self___features___5_____3___mlp_2 = (self.
          getattr_getattr_L__self___features___5_____3___mlp_2(
          getattr_getattr_l__self___features___5_____3___mlp_1))
      getattr_getattr_l__self___features___5_____3___mlp_1 = None
      getattr_getattr_l__self___features___5_____3___mlp_3 = (self.
          getattr_getattr_L__self___features___5_____3___mlp_3(
          getattr_getattr_l__self___features___5_____3___mlp_2))
      getattr_getattr_l__self___features___5_____3___mlp_2 = None
      getattr_getattr_l__self___features___5_____3___mlp_4 = (self.
          getattr_getattr_L__self___features___5_____3___mlp_4(
          getattr_getattr_l__self___features___5_____3___mlp_3))
      getattr_getattr_l__self___features___5_____3___mlp_3 = None
      _log_api_usage_once_15 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_3038 = []
      __temp_3038.extend((1, 1, 1, 1))
      noise_26 = torch.empty(__temp_3038, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_27 = noise_26.bernoulli_(0.8727272727272727)
      noise_26 = None
      div__13 = noise_27.div_(0.8727272727272727)
      mul_21 = getattr_getattr_l__self___features___5_____3___mlp_4 * noise_27
      getattr_getattr_l__self___features___5_____3___mlp_4 = None
      noise_27 = None
      x_105 = x_104 + mul_21
      x_104 = None
      mul_21 = None
      getattr_getattr_l__self___features___5_____4___norm1 = (self.
          getattr_getattr_L__self___features___5_____4___norm1(x_105))
      (
          getattr_getattr_l__self___features___5_____4___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___5_____4___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___5_____4___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___5_____4___attn_relative_position_index
          )
      relative_position_bias_32 = (
          getattr_getattr_l__self___features___5_____4___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___5_____4___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___5_____4___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___5_____4___attn_relative_position_index
          ) = None
      relative_position_bias_33 = relative_position_bias_32.view(49, 49, -1)
      relative_position_bias_32 = None
      permute_37 = relative_position_bias_33.permute(2, 0, 1)
      relative_position_bias_33 = None
      contiguous_16 = permute_37.contiguous()
      permute_37 = None
      relative_position_bias_35 = contiguous_16.unsqueeze(0)
      contiguous_16 = None
      getattr_getattr_l__self___features___5_____4___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___5_____4___attn_qkv_weight)
      getattr_getattr_l__self___features___5_____4___attn_proj_weight = (self.
          getattr_getattr_L__self___features___5_____4___attn_proj_weight)
      getattr_getattr_l__self___features___5_____4___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___5_____4___attn_qkv_bias)
      getattr_getattr_l__self___features___5_____4___attn_proj_bias = (self.
          getattr_getattr_L__self___features___5_____4___attn_proj_bias)
      x_106 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___5_____4___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___5_____4___norm1 = None
      x_107 = x_106.view(1, 2, 7, 2, 7, 384)
      x_106 = None
      permute_38 = x_107.permute(0, 1, 3, 2, 4, 5)
      x_107 = None
      x_108 = permute_38.reshape(4, 49, 384)
      permute_38 = None
      qkv_16 = torch._C._nn.linear(x_108,
          getattr_getattr_l__self___features___5_____4___attn_qkv_weight,
          getattr_getattr_l__self___features___5_____4___attn_qkv_bias)
      x_108 = None
      getattr_getattr_l__self___features___5_____4___attn_qkv_weight = None
      getattr_getattr_l__self___features___5_____4___attn_qkv_bias = None
      reshape_37 = qkv_16.reshape(4, 49, 3, 12, 32)
      qkv_16 = None
      qkv_17 = reshape_37.permute(2, 0, 3, 1, 4)
      reshape_37 = None
      q_16 = qkv_17[0]
      k_8 = qkv_17[1]
      v_8 = qkv_17[2]
      qkv_17 = None
      q_17 = q_16 * 0.1767766952966369
      q_16 = None
      transpose_16 = k_8.transpose(-2, -1)
      k_8 = None
      attn_44 = q_17.matmul(transpose_16)
      q_17 = None
      transpose_16 = None
      attn_45 = attn_44 + relative_position_bias_35
      attn_44 = None
      relative_position_bias_35 = None
      attn_46 = torch.nn.functional.softmax(attn_45, dim=-1)
      attn_45 = None
      attn_47 = torch.nn.functional.dropout(attn_46, p=0.0, training=True)
      attn_46 = None
      matmul_17 = attn_47.matmul(v_8)
      attn_47 = None
      v_8 = None
      transpose_17 = matmul_17.transpose(1, 2)
      matmul_17 = None
      x_109 = transpose_17.reshape(4, 49, 384)
      transpose_17 = None
      x_110 = torch._C._nn.linear(x_109,
          getattr_getattr_l__self___features___5_____4___attn_proj_weight,
          getattr_getattr_l__self___features___5_____4___attn_proj_bias)
      x_109 = None
      getattr_getattr_l__self___features___5_____4___attn_proj_weight = None
      getattr_getattr_l__self___features___5_____4___attn_proj_bias = None
      x_111 = torch.nn.functional.dropout(x_110, p=0.0, training=True)
      x_110 = None
      x_112 = x_111.view(1, 2, 2, 7, 7, 384)
      x_111 = None
      permute_40 = x_112.permute(0, 1, 3, 2, 4, 5)
      x_112 = None
      x_113 = permute_40.reshape(1, 14, 14, 384)
      permute_40 = None
      getitem_52 = x_113[slice(None, None, None), slice(None, 14, None), slice(
          None, 14, None), slice(None, None, None)]
      x_113 = None
      x_114 = getitem_52.contiguous()
      getitem_52 = None
      _log_api_usage_once_16 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_3074 = []
      __temp_3074.extend((1, 1, 1, 1))
      noise_28 = torch.empty(__temp_3074, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_29 = noise_28.bernoulli_(0.8545454545454545)
      noise_28 = None
      div__14 = noise_29.div_(0.8545454545454545)
      mul_23 = x_114 * noise_29
      x_114 = None
      noise_29 = None
      x_115 = x_105 + mul_23
      x_105 = None
      mul_23 = None
      getattr_getattr_l__self___features___5_____4___norm2 = (self.
          getattr_getattr_L__self___features___5_____4___norm2(x_115))
      getattr_getattr_l__self___features___5_____4___mlp_0 = (self.
          getattr_getattr_L__self___features___5_____4___mlp_0(
          getattr_getattr_l__self___features___5_____4___norm2))
      getattr_getattr_l__self___features___5_____4___norm2 = None
      getattr_getattr_l__self___features___5_____4___mlp_1 = (self.
          getattr_getattr_L__self___features___5_____4___mlp_1(
          getattr_getattr_l__self___features___5_____4___mlp_0))
      getattr_getattr_l__self___features___5_____4___mlp_0 = None
      getattr_getattr_l__self___features___5_____4___mlp_2 = (self.
          getattr_getattr_L__self___features___5_____4___mlp_2(
          getattr_getattr_l__self___features___5_____4___mlp_1))
      getattr_getattr_l__self___features___5_____4___mlp_1 = None
      getattr_getattr_l__self___features___5_____4___mlp_3 = (self.
          getattr_getattr_L__self___features___5_____4___mlp_3(
          getattr_getattr_l__self___features___5_____4___mlp_2))
      getattr_getattr_l__self___features___5_____4___mlp_2 = None
      getattr_getattr_l__self___features___5_____4___mlp_4 = (self.
          getattr_getattr_L__self___features___5_____4___mlp_4(
          getattr_getattr_l__self___features___5_____4___mlp_3))
      getattr_getattr_l__self___features___5_____4___mlp_3 = None
      _log_api_usage_once_17 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_3087 = []
      __temp_3087.extend((1, 1, 1, 1))
      noise_30 = torch.empty(__temp_3087, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_31 = noise_30.bernoulli_(0.8545454545454545)
      noise_30 = None
      div__15 = noise_31.div_(0.8545454545454545)
      mul_24 = getattr_getattr_l__self___features___5_____4___mlp_4 * noise_31
      getattr_getattr_l__self___features___5_____4___mlp_4 = None
      noise_31 = None
      x_116 = x_115 + mul_24
      x_115 = None
      mul_24 = None
      getattr_getattr_l__self___features___5_____5___norm1 = (self.
          getattr_getattr_L__self___features___5_____5___norm1(x_116))
      (
          getattr_getattr_l__self___features___5_____5___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___5_____5___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___5_____5___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___5_____5___attn_relative_position_index
          )
      relative_position_bias_36 = (
          getattr_getattr_l__self___features___5_____5___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___5_____5___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___5_____5___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___5_____5___attn_relative_position_index
          ) = None
      relative_position_bias_37 = relative_position_bias_36.view(49, 49, -1)
      relative_position_bias_36 = None
      permute_41 = relative_position_bias_37.permute(2, 0, 1)
      relative_position_bias_37 = None
      contiguous_18 = permute_41.contiguous()
      permute_41 = None
      relative_position_bias_39 = contiguous_18.unsqueeze(0)
      contiguous_18 = None
      getattr_getattr_l__self___features___5_____5___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___5_____5___attn_qkv_weight)
      getattr_getattr_l__self___features___5_____5___attn_proj_weight = (self.
          getattr_getattr_L__self___features___5_____5___attn_proj_weight)
      getattr_getattr_l__self___features___5_____5___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___5_____5___attn_qkv_bias)
      getattr_getattr_l__self___features___5_____5___attn_proj_bias = (self.
          getattr_getattr_L__self___features___5_____5___attn_proj_bias)
      x_117 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___5_____5___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___5_____5___norm1 = None
      x_118 = torch.roll(x_117, shifts=(-3, -3), dims=(1, 2))
      x_117 = None
      x_119 = x_118.view(1, 2, 7, 2, 7, 384)
      x_118 = None
      permute_42 = x_119.permute(0, 1, 3, 2, 4, 5)
      x_119 = None
      x_120 = permute_42.reshape(4, 49, 384)
      permute_42 = None
      qkv_18 = torch._C._nn.linear(x_120,
          getattr_getattr_l__self___features___5_____5___attn_qkv_weight,
          getattr_getattr_l__self___features___5_____5___attn_qkv_bias)
      getattr_getattr_l__self___features___5_____5___attn_qkv_weight = None
      getattr_getattr_l__self___features___5_____5___attn_qkv_bias = None
      reshape_41 = qkv_18.reshape(4, 49, 3, 12, 32)
      qkv_18 = None
      qkv_19 = reshape_41.permute(2, 0, 3, 1, 4)
      reshape_41 = None
      q_18 = qkv_19[0]
      k_9 = qkv_19[1]
      v_9 = qkv_19[2]
      qkv_19 = None
      q_19 = q_18 * 0.1767766952966369
      q_18 = None
      transpose_18 = k_9.transpose(-2, -1)
      k_9 = None
      attn_48 = q_19.matmul(transpose_18)
      q_19 = None
      transpose_18 = None
      attn_49 = attn_48 + relative_position_bias_39
      attn_48 = None
      relative_position_bias_39 = None
      attn_mask_20 = x_120.new_zeros((14, 14))
      x_120 = None
      attn_mask_20[slice(0, -7, None), slice(0, -7, None)] = 0
      setitem_36 = attn_mask_20
      attn_mask_20[slice(0, -7, None), slice(-7, -3, None)] = 1
      setitem_37 = attn_mask_20
      attn_mask_20[slice(0, -7, None), slice(-3, None, None)] = 2
      setitem_38 = attn_mask_20
      attn_mask_20[slice(-7, -3, None), slice(0, -7, None)] = 3
      setitem_39 = attn_mask_20
      attn_mask_20[slice(-7, -3, None), slice(-7, -3, None)] = 4
      setitem_40 = attn_mask_20
      attn_mask_20[slice(-7, -3, None), slice(-3, None, None)] = 5
      setitem_41 = attn_mask_20
      attn_mask_20[slice(-3, None, None), slice(0, -7, None)] = 6
      setitem_42 = attn_mask_20
      attn_mask_20[slice(-3, None, None), slice(-7, -3, None)] = 7
      setitem_43 = attn_mask_20
      attn_mask_20[slice(-3, None, None), slice(-3, None, None)] = 8
      setitem_44 = attn_mask_20
      attn_mask_21 = attn_mask_20.view(2, 7, 2, 7)
      attn_mask_20 = None
      permute_44 = attn_mask_21.permute(0, 2, 1, 3)
      attn_mask_21 = None
      attn_mask_22 = permute_44.reshape(4, 49)
      permute_44 = None
      unsqueeze_26 = attn_mask_22.unsqueeze(1)
      unsqueeze_27 = attn_mask_22.unsqueeze(2)
      attn_mask_22 = None
      attn_mask_23 = unsqueeze_26 - unsqueeze_27
      unsqueeze_26 = None
      unsqueeze_27 = None
      ne_4 = attn_mask_23 != 0
      masked_fill_8 = attn_mask_23.masked_fill(ne_4, -100.0)
      ne_4 = None
      eq_4 = attn_mask_23 == 0
      attn_mask_23 = None
      attn_mask_24 = masked_fill_8.masked_fill(eq_4, 0.0)
      masked_fill_8 = None
      eq_4 = None
      attn_50 = attn_49.view(1, 4, 12, 49, 49)
      attn_49 = None
      unsqueeze_28 = attn_mask_24.unsqueeze(1)
      attn_mask_24 = None
      unsqueeze_29 = unsqueeze_28.unsqueeze(0)
      unsqueeze_28 = None
      attn_51 = attn_50 + unsqueeze_29
      attn_50 = None
      unsqueeze_29 = None
      attn_52 = attn_51.view(-1, 12, 49, 49)
      attn_51 = None
      attn_53 = torch.nn.functional.softmax(attn_52, dim=-1)
      attn_52 = None
      attn_54 = torch.nn.functional.dropout(attn_53, p=0.0, training=True)
      attn_53 = None
      matmul_19 = attn_54.matmul(v_9)
      attn_54 = None
      v_9 = None
      transpose_19 = matmul_19.transpose(1, 2)
      matmul_19 = None
      x_121 = transpose_19.reshape(4, 49, 384)
      transpose_19 = None
      x_122 = torch._C._nn.linear(x_121,
          getattr_getattr_l__self___features___5_____5___attn_proj_weight,
          getattr_getattr_l__self___features___5_____5___attn_proj_bias)
      x_121 = None
      getattr_getattr_l__self___features___5_____5___attn_proj_weight = None
      getattr_getattr_l__self___features___5_____5___attn_proj_bias = None
      x_123 = torch.nn.functional.dropout(x_122, p=0.0, training=True)
      x_122 = None
      x_124 = x_123.view(1, 2, 2, 7, 7, 384)
      x_123 = None
      permute_45 = x_124.permute(0, 1, 3, 2, 4, 5)
      x_124 = None
      x_125 = permute_45.reshape(1, 14, 14, 384)
      permute_45 = None
      x_126 = torch.roll(x_125, shifts=(3, 3), dims=(1, 2))
      x_125 = None
      getitem_57 = x_126[slice(None, None, None), slice(None, 14, None), slice(
          None, 14, None), slice(None, None, None)]
      x_126 = None
      x_127 = getitem_57.contiguous()
      getitem_57 = None
      _log_api_usage_once_18 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_3155 = []
      __temp_3155.extend((1, 1, 1, 1))
      noise_32 = torch.empty(__temp_3155, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_33 = noise_32.bernoulli_(0.8363636363636364)
      noise_32 = None
      div__16 = noise_33.div_(0.8363636363636364)
      mul_26 = x_127 * noise_33
      x_127 = None
      noise_33 = None
      x_128 = x_116 + mul_26
      x_116 = None
      mul_26 = None
      getattr_getattr_l__self___features___5_____5___norm2 = (self.
          getattr_getattr_L__self___features___5_____5___norm2(x_128))
      getattr_getattr_l__self___features___5_____5___mlp_0 = (self.
          getattr_getattr_L__self___features___5_____5___mlp_0(
          getattr_getattr_l__self___features___5_____5___norm2))
      getattr_getattr_l__self___features___5_____5___norm2 = None
      getattr_getattr_l__self___features___5_____5___mlp_1 = (self.
          getattr_getattr_L__self___features___5_____5___mlp_1(
          getattr_getattr_l__self___features___5_____5___mlp_0))
      getattr_getattr_l__self___features___5_____5___mlp_0 = None
      getattr_getattr_l__self___features___5_____5___mlp_2 = (self.
          getattr_getattr_L__self___features___5_____5___mlp_2(
          getattr_getattr_l__self___features___5_____5___mlp_1))
      getattr_getattr_l__self___features___5_____5___mlp_1 = None
      getattr_getattr_l__self___features___5_____5___mlp_3 = (self.
          getattr_getattr_L__self___features___5_____5___mlp_3(
          getattr_getattr_l__self___features___5_____5___mlp_2))
      getattr_getattr_l__self___features___5_____5___mlp_2 = None
      getattr_getattr_l__self___features___5_____5___mlp_4 = (self.
          getattr_getattr_L__self___features___5_____5___mlp_4(
          getattr_getattr_l__self___features___5_____5___mlp_3))
      getattr_getattr_l__self___features___5_____5___mlp_3 = None
      _log_api_usage_once_19 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_3168 = []
      __temp_3168.extend((1, 1, 1, 1))
      noise_34 = torch.empty(__temp_3168, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_35 = noise_34.bernoulli_(0.8363636363636364)
      noise_34 = None
      div__17 = noise_35.div_(0.8363636363636364)
      mul_27 = getattr_getattr_l__self___features___5_____5___mlp_4 * noise_35
      getattr_getattr_l__self___features___5_____5___mlp_4 = None
      noise_35 = None
      x_129 = x_128 + mul_27
      x_128 = None
      mul_27 = None
      x_130 = torch.nn.functional.pad(x_129, (0, 0, 0, 0, 0, 0))
      x_129 = None
      x0_2 = x_130[Ellipsis, slice(0, None, 2), slice(0, None, 2), slice(None,
          None, None)]
      x1_2 = x_130[Ellipsis, slice(1, None, 2), slice(0, None, 2), slice(None,
          None, None)]
      x2_2 = x_130[Ellipsis, slice(0, None, 2), slice(1, None, 2), slice(None,
          None, None)]
      x3_2 = x_130[Ellipsis, slice(1, None, 2), slice(1, None, 2), slice(None,
          None, None)]
      x_130 = None
      x_132 = torch.cat([x0_2, x1_2, x2_2, x3_2], -1)
      x0_2 = None
      x1_2 = None
      x2_2 = None
      x3_2 = None
      x_133 = self.getattr_L__self___features___6___norm(x_132)
      x_132 = None
      x_134 = self.getattr_L__self___features___6___reduction(x_133)
      x_133 = None
      getattr_getattr_l__self___features___7_____0___norm1 = (self.
          getattr_getattr_L__self___features___7_____0___norm1(x_134))
      (
          getattr_getattr_l__self___features___7_____0___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___7_____0___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___7_____0___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___7_____0___attn_relative_position_index
          )
      relative_position_bias_40 = (
          getattr_getattr_l__self___features___7_____0___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___7_____0___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___7_____0___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___7_____0___attn_relative_position_index
          ) = None
      relative_position_bias_41 = relative_position_bias_40.view(49, 49, -1)
      relative_position_bias_40 = None
      permute_46 = relative_position_bias_41.permute(2, 0, 1)
      relative_position_bias_41 = None
      contiguous_20 = permute_46.contiguous()
      permute_46 = None
      relative_position_bias_43 = contiguous_20.unsqueeze(0)
      contiguous_20 = None
      getattr_getattr_l__self___features___7_____0___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___7_____0___attn_qkv_weight)
      getattr_getattr_l__self___features___7_____0___attn_proj_weight = (self.
          getattr_getattr_L__self___features___7_____0___attn_proj_weight)
      getattr_getattr_l__self___features___7_____0___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___7_____0___attn_qkv_bias)
      getattr_getattr_l__self___features___7_____0___attn_proj_bias = (self.
          getattr_getattr_L__self___features___7_____0___attn_proj_bias)
      x_135 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___7_____0___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___7_____0___norm1 = None
      x_136 = x_135.view(1, 1, 7, 1, 7, 768)
      x_135 = None
      permute_47 = x_136.permute(0, 1, 3, 2, 4, 5)
      x_136 = None
      x_137 = permute_47.reshape(1, 49, 768)
      permute_47 = None
      qkv_20 = torch._C._nn.linear(x_137,
          getattr_getattr_l__self___features___7_____0___attn_qkv_weight,
          getattr_getattr_l__self___features___7_____0___attn_qkv_bias)
      x_137 = None
      getattr_getattr_l__self___features___7_____0___attn_qkv_weight = None
      getattr_getattr_l__self___features___7_____0___attn_qkv_bias = None
      reshape_46 = qkv_20.reshape(1, 49, 3, 24, 32)
      qkv_20 = None
      qkv_21 = reshape_46.permute(2, 0, 3, 1, 4)
      reshape_46 = None
      q_20 = qkv_21[0]
      k_10 = qkv_21[1]
      v_10 = qkv_21[2]
      qkv_21 = None
      q_21 = q_20 * 0.1767766952966369
      q_20 = None
      transpose_20 = k_10.transpose(-2, -1)
      k_10 = None
      attn_55 = q_21.matmul(transpose_20)
      q_21 = None
      transpose_20 = None
      attn_56 = attn_55 + relative_position_bias_43
      attn_55 = None
      relative_position_bias_43 = None
      attn_57 = torch.nn.functional.softmax(attn_56, dim=-1)
      attn_56 = None
      attn_58 = torch.nn.functional.dropout(attn_57, p=0.0, training=True)
      attn_57 = None
      matmul_21 = attn_58.matmul(v_10)
      attn_58 = None
      v_10 = None
      transpose_21 = matmul_21.transpose(1, 2)
      matmul_21 = None
      x_138 = transpose_21.reshape(1, 49, 768)
      transpose_21 = None
      x_139 = torch._C._nn.linear(x_138,
          getattr_getattr_l__self___features___7_____0___attn_proj_weight,
          getattr_getattr_l__self___features___7_____0___attn_proj_bias)
      x_138 = None
      getattr_getattr_l__self___features___7_____0___attn_proj_weight = None
      getattr_getattr_l__self___features___7_____0___attn_proj_bias = None
      x_140 = torch.nn.functional.dropout(x_139, p=0.0, training=True)
      x_139 = None
      x_141 = x_140.view(1, 1, 1, 7, 7, 768)
      x_140 = None
      permute_49 = x_141.permute(0, 1, 3, 2, 4, 5)
      x_141 = None
      x_142 = permute_49.reshape(1, 7, 7, 768)
      permute_49 = None
      getitem_66 = x_142[slice(None, None, None), slice(None, 7, None), slice(
          None, 7, None), slice(None, None, None)]
      x_142 = None
      x_143 = getitem_66.contiguous()
      getitem_66 = None
      _log_api_usage_once_20 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_3221 = []
      __temp_3221.extend((1, 1, 1, 1))
      noise_36 = torch.empty(__temp_3221, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_37 = noise_36.bernoulli_(0.8181818181818181)
      noise_36 = None
      div__18 = noise_37.div_(0.8181818181818181)
      mul_29 = x_143 * noise_37
      x_143 = None
      noise_37 = None
      x_144 = x_134 + mul_29
      x_134 = None
      mul_29 = None
      getattr_getattr_l__self___features___7_____0___norm2 = (self.
          getattr_getattr_L__self___features___7_____0___norm2(x_144))
      getattr_getattr_l__self___features___7_____0___mlp_0 = (self.
          getattr_getattr_L__self___features___7_____0___mlp_0(
          getattr_getattr_l__self___features___7_____0___norm2))
      getattr_getattr_l__self___features___7_____0___norm2 = None
      getattr_getattr_l__self___features___7_____0___mlp_1 = (self.
          getattr_getattr_L__self___features___7_____0___mlp_1(
          getattr_getattr_l__self___features___7_____0___mlp_0))
      getattr_getattr_l__self___features___7_____0___mlp_0 = None
      getattr_getattr_l__self___features___7_____0___mlp_2 = (self.
          getattr_getattr_L__self___features___7_____0___mlp_2(
          getattr_getattr_l__self___features___7_____0___mlp_1))
      getattr_getattr_l__self___features___7_____0___mlp_1 = None
      getattr_getattr_l__self___features___7_____0___mlp_3 = (self.
          getattr_getattr_L__self___features___7_____0___mlp_3(
          getattr_getattr_l__self___features___7_____0___mlp_2))
      getattr_getattr_l__self___features___7_____0___mlp_2 = None
      getattr_getattr_l__self___features___7_____0___mlp_4 = (self.
          getattr_getattr_L__self___features___7_____0___mlp_4(
          getattr_getattr_l__self___features___7_____0___mlp_3))
      getattr_getattr_l__self___features___7_____0___mlp_3 = None
      _log_api_usage_once_21 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_3234 = []
      __temp_3234.extend((1, 1, 1, 1))
      noise_38 = torch.empty(__temp_3234, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_39 = noise_38.bernoulli_(0.8181818181818181)
      noise_38 = None
      div__19 = noise_39.div_(0.8181818181818181)
      mul_30 = getattr_getattr_l__self___features___7_____0___mlp_4 * noise_39
      getattr_getattr_l__self___features___7_____0___mlp_4 = None
      noise_39 = None
      x_145 = x_144 + mul_30
      x_144 = None
      mul_30 = None
      getattr_getattr_l__self___features___7_____1___norm1 = (self.
          getattr_getattr_L__self___features___7_____1___norm1(x_145))
      (
          getattr_getattr_l__self___features___7_____1___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___7_____1___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___7_____1___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___7_____1___attn_relative_position_index
          )
      relative_position_bias_44 = (
          getattr_getattr_l__self___features___7_____1___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___7_____1___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___7_____1___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___7_____1___attn_relative_position_index
          ) = None
      relative_position_bias_45 = relative_position_bias_44.view(49, 49, -1)
      relative_position_bias_44 = None
      permute_50 = relative_position_bias_45.permute(2, 0, 1)
      relative_position_bias_45 = None
      contiguous_22 = permute_50.contiguous()
      permute_50 = None
      relative_position_bias_47 = contiguous_22.unsqueeze(0)
      contiguous_22 = None
      getattr_getattr_l__self___features___7_____1___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___7_____1___attn_qkv_weight)
      getattr_getattr_l__self___features___7_____1___attn_proj_weight = (self.
          getattr_getattr_L__self___features___7_____1___attn_proj_weight)
      getattr_getattr_l__self___features___7_____1___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___7_____1___attn_qkv_bias)
      getattr_getattr_l__self___features___7_____1___attn_proj_bias = (self.
          getattr_getattr_L__self___features___7_____1___attn_proj_bias)
      x_146 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___7_____1___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___7_____1___norm1 = None
      x_147 = x_146.view(1, 1, 7, 1, 7, 768)
      x_146 = None
      permute_51 = x_147.permute(0, 1, 3, 2, 4, 5)
      x_147 = None
      x_148 = permute_51.reshape(1, 49, 768)
      permute_51 = None
      qkv_22 = torch._C._nn.linear(x_148,
          getattr_getattr_l__self___features___7_____1___attn_qkv_weight,
          getattr_getattr_l__self___features___7_____1___attn_qkv_bias)
      x_148 = None
      getattr_getattr_l__self___features___7_____1___attn_qkv_weight = None
      getattr_getattr_l__self___features___7_____1___attn_qkv_bias = None
      reshape_50 = qkv_22.reshape(1, 49, 3, 24, 32)
      qkv_22 = None
      qkv_23 = reshape_50.permute(2, 0, 3, 1, 4)
      reshape_50 = None
      q_22 = qkv_23[0]
      k_11 = qkv_23[1]
      v_11 = qkv_23[2]
      qkv_23 = None
      q_23 = q_22 * 0.1767766952966369
      q_22 = None
      transpose_22 = k_11.transpose(-2, -1)
      k_11 = None
      attn_59 = q_23.matmul(transpose_22)
      q_23 = None
      transpose_22 = None
      attn_60 = attn_59 + relative_position_bias_47
      attn_59 = None
      relative_position_bias_47 = None
      attn_61 = torch.nn.functional.softmax(attn_60, dim=-1)
      attn_60 = None
      attn_62 = torch.nn.functional.dropout(attn_61, p=0.0, training=True)
      attn_61 = None
      matmul_23 = attn_62.matmul(v_11)
      attn_62 = None
      v_11 = None
      transpose_23 = matmul_23.transpose(1, 2)
      matmul_23 = None
      x_149 = transpose_23.reshape(1, 49, 768)
      transpose_23 = None
      x_150 = torch._C._nn.linear(x_149,
          getattr_getattr_l__self___features___7_____1___attn_proj_weight,
          getattr_getattr_l__self___features___7_____1___attn_proj_bias)
      x_149 = None
      getattr_getattr_l__self___features___7_____1___attn_proj_weight = None
      getattr_getattr_l__self___features___7_____1___attn_proj_bias = None
      x_151 = torch.nn.functional.dropout(x_150, p=0.0, training=True)
      x_150 = None
      x_152 = x_151.view(1, 1, 1, 7, 7, 768)
      x_151 = None
      permute_53 = x_152.permute(0, 1, 3, 2, 4, 5)
      x_152 = None
      x_153 = permute_53.reshape(1, 7, 7, 768)
      permute_53 = None
      getitem_71 = x_153[slice(None, None, None), slice(None, 7, None), slice(
          None, 7, None), slice(None, None, None)]
      x_153 = None
      x_154 = getitem_71.contiguous()
      getitem_71 = None
      _log_api_usage_once_22 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_3270 = []
      __temp_3270.extend((1, 1, 1, 1))
      noise_40 = torch.empty(__temp_3270, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_41 = noise_40.bernoulli_(0.8)
      noise_40 = None
      div__20 = noise_41.div_(0.8)
      mul_32 = x_154 * noise_41
      x_154 = None
      noise_41 = None
      x_155 = x_145 + mul_32
      x_145 = None
      mul_32 = None
      getattr_getattr_l__self___features___7_____1___norm2 = (self.
          getattr_getattr_L__self___features___7_____1___norm2(x_155))
      getattr_getattr_l__self___features___7_____1___mlp_0 = (self.
          getattr_getattr_L__self___features___7_____1___mlp_0(
          getattr_getattr_l__self___features___7_____1___norm2))
      getattr_getattr_l__self___features___7_____1___norm2 = None
      getattr_getattr_l__self___features___7_____1___mlp_1 = (self.
          getattr_getattr_L__self___features___7_____1___mlp_1(
          getattr_getattr_l__self___features___7_____1___mlp_0))
      getattr_getattr_l__self___features___7_____1___mlp_0 = None
      getattr_getattr_l__self___features___7_____1___mlp_2 = (self.
          getattr_getattr_L__self___features___7_____1___mlp_2(
          getattr_getattr_l__self___features___7_____1___mlp_1))
      getattr_getattr_l__self___features___7_____1___mlp_1 = None
      getattr_getattr_l__self___features___7_____1___mlp_3 = (self.
          getattr_getattr_L__self___features___7_____1___mlp_3(
          getattr_getattr_l__self___features___7_____1___mlp_2))
      getattr_getattr_l__self___features___7_____1___mlp_2 = None
      getattr_getattr_l__self___features___7_____1___mlp_4 = (self.
          getattr_getattr_L__self___features___7_____1___mlp_4(
          getattr_getattr_l__self___features___7_____1___mlp_3))
      getattr_getattr_l__self___features___7_____1___mlp_3 = None
      _log_api_usage_once_23 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_3283 = []
      __temp_3283.extend((1, 1, 1, 1))
      noise_42 = torch.empty(__temp_3283, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_43 = noise_42.bernoulli_(0.8)
      noise_42 = None
      div__21 = noise_43.div_(0.8)
      mul_33 = getattr_getattr_l__self___features___7_____1___mlp_4 * noise_43
      getattr_getattr_l__self___features___7_____1___mlp_4 = None
      noise_43 = None
      x_157 = x_155 + mul_33
      x_155 = None
      mul_33 = None
      x_158 = self.L__self___norm(x_157)
      x_157 = None
      __temp_3290 = []
      __temp_3290.extend((0, 3, 1, 2))
      x_159 = torch.permute(x_158, __temp_3290)
      x_158 = None
      x_160 = self.L__self___avgpool(x_159)
      x_159 = None
      x_161 = self.L__self___flatten(x_160)
      x_160 = None
      x_162 = self.L__self___head(x_161)
      x_161 = None
      return x_162,

  ```
</details>
<details>
  <summary>forward</summary>

  ```python
  def forward(self, x):
      x = self.features(x)
      x = self.norm(x)
      x = self.permute(x)
      x = self.avgpool(x)
      x = self.flatten(x)
      x = self.head(x)
      return x

  ```
</details>
<details>
  <summary>compiled_code_9</summary>

  ```python
  def compiled_code_9(self, x):
      return __compiled_fn_7(x)[0]

  ```
</details>
<details>
  <summary>compiled_code_10</summary>

  ```python
  def compiled_code_10(self, x):
      return __compiled_fn_6(x)[0]

  ```
</details>
<details>
  <summary>__compiled_fn_7</summary>

  ```python
  def __compiled_fn_7(self, L_x_):
      l_x_ = L_x_
      l__self___features_0_0 = self.L__self___features_0_0(l_x_)
      l_x_ = None
      __temp_1698 = []
      __temp_1698.extend((0, 2, 3, 1))
      permute = torch.permute(l__self___features_0_0, __temp_1698)
      l__self___features_0_0 = None
      l__self___features_0_2 = self.L__self___features_0_2(permute)
      permute = None
      getattr_getattr_l__self___features___1_____0___norm1 = (self.
          getattr_getattr_L__self___features___1_____0___norm1(
          l__self___features_0_2))
      (
          getattr_getattr_l__self___features___1_____0___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___1_____0___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___1_____0___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___1_____0___attn_relative_position_index
          )
      relative_position_bias = (
          getattr_getattr_l__self___features___1_____0___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___1_____0___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___1_____0___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___1_____0___attn_relative_position_index
          ) = None
      relative_position_bias_1 = relative_position_bias.view(49, 49, -1)
      relative_position_bias = None
      permute_1 = relative_position_bias_1.permute(2, 0, 1)
      relative_position_bias_1 = None
      contiguous = permute_1.contiguous()
      permute_1 = None
      relative_position_bias_3 = contiguous.unsqueeze(0)
      contiguous = None
      getattr_getattr_l__self___features___1_____0___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___1_____0___attn_qkv_weight)
      getattr_getattr_l__self___features___1_____0___attn_proj_weight = (self.
          getattr_getattr_L__self___features___1_____0___attn_proj_weight)
      getattr_getattr_l__self___features___1_____0___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___1_____0___attn_qkv_bias)
      getattr_getattr_l__self___features___1_____0___attn_proj_bias = (self.
          getattr_getattr_L__self___features___1_____0___attn_proj_bias)
      x = torch.nn.functional.pad(
          getattr_getattr_l__self___features___1_____0___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___1_____0___norm1 = None
      x_1 = x.view(1, 8, 7, 8, 7, 96)
      x = None
      permute_2 = x_1.permute(0, 1, 3, 2, 4, 5)
      x_1 = None
      x_2 = permute_2.reshape(64, 49, 96)
      permute_2 = None
      qkv = torch._C._nn.linear(x_2,
          getattr_getattr_l__self___features___1_____0___attn_qkv_weight,
          getattr_getattr_l__self___features___1_____0___attn_qkv_bias)
      x_2 = None
      getattr_getattr_l__self___features___1_____0___attn_qkv_weight = None
      getattr_getattr_l__self___features___1_____0___attn_qkv_bias = None
      reshape_1 = qkv.reshape(64, 49, 3, 3, 32)
      qkv = None
      qkv_1 = reshape_1.permute(2, 0, 3, 1, 4)
      reshape_1 = None
      q = qkv_1[0]
      k = qkv_1[1]
      v = qkv_1[2]
      qkv_1 = None
      q_1 = q * 0.1767766952966369
      q = None
      transpose = k.transpose(-2, -1)
      k = None
      attn = q_1.matmul(transpose)
      q_1 = None
      transpose = None
      attn_1 = attn + relative_position_bias_3
      attn = None
      relative_position_bias_3 = None
      attn_2 = torch.nn.functional.softmax(attn_1, dim=-1)
      attn_1 = None
      attn_3 = torch.nn.functional.dropout(attn_2, p=0.0, training=True)
      attn_2 = None
      matmul_1 = attn_3.matmul(v)
      attn_3 = None
      v = None
      transpose_1 = matmul_1.transpose(1, 2)
      matmul_1 = None
      x_3 = transpose_1.reshape(64, 49, 96)
      transpose_1 = None
      x_4 = torch._C._nn.linear(x_3,
          getattr_getattr_l__self___features___1_____0___attn_proj_weight,
          getattr_getattr_l__self___features___1_____0___attn_proj_bias)
      x_3 = None
      getattr_getattr_l__self___features___1_____0___attn_proj_weight = None
      getattr_getattr_l__self___features___1_____0___attn_proj_bias = None
      x_5 = torch.nn.functional.dropout(x_4, p=0.0, training=True)
      x_4 = None
      x_6 = x_5.view(1, 8, 8, 7, 7, 96)
      x_5 = None
      permute_4 = x_6.permute(0, 1, 3, 2, 4, 5)
      x_6 = None
      x_7 = permute_4.reshape(1, 56, 56, 96)
      permute_4 = None
      getitem_4 = x_7[slice(None, None, None), slice(None, 56, None), slice(None,
          56, None), slice(None, None, None)]
      x_7 = None
      x_8 = getitem_4.contiguous()
      getitem_4 = None
      _log_api_usage_once = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      x_9 = l__self___features_0_2 + x_8
      l__self___features_0_2 = None
      x_8 = None
      getattr_getattr_l__self___features___1_____0___norm2 = (self.
          getattr_getattr_L__self___features___1_____0___norm2(x_9))
      getattr_getattr_l__self___features___1_____0___mlp_0 = (self.
          getattr_getattr_L__self___features___1_____0___mlp_0(
          getattr_getattr_l__self___features___1_____0___norm2))
      getattr_getattr_l__self___features___1_____0___norm2 = None
      getattr_getattr_l__self___features___1_____0___mlp_1 = (self.
          getattr_getattr_L__self___features___1_____0___mlp_1(
          getattr_getattr_l__self___features___1_____0___mlp_0))
      getattr_getattr_l__self___features___1_____0___mlp_0 = None
      getattr_getattr_l__self___features___1_____0___mlp_2 = (self.
          getattr_getattr_L__self___features___1_____0___mlp_2(
          getattr_getattr_l__self___features___1_____0___mlp_1))
      getattr_getattr_l__self___features___1_____0___mlp_1 = None
      getattr_getattr_l__self___features___1_____0___mlp_3 = (self.
          getattr_getattr_L__self___features___1_____0___mlp_3(
          getattr_getattr_l__self___features___1_____0___mlp_2))
      getattr_getattr_l__self___features___1_____0___mlp_2 = None
      getattr_getattr_l__self___features___1_____0___mlp_4 = (self.
          getattr_getattr_L__self___features___1_____0___mlp_4(
          getattr_getattr_l__self___features___1_____0___mlp_3))
      getattr_getattr_l__self___features___1_____0___mlp_3 = None
      _log_api_usage_once_1 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      x_10 = x_9 + getattr_getattr_l__self___features___1_____0___mlp_4
      x_9 = None
      getattr_getattr_l__self___features___1_____0___mlp_4 = None
      getattr_getattr_l__self___features___1_____1___norm1 = (self.
          getattr_getattr_L__self___features___1_____1___norm1(x_10))
      (
          getattr_getattr_l__self___features___1_____1___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___1_____1___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___1_____1___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___1_____1___attn_relative_position_index
          )
      relative_position_bias_4 = (
          getattr_getattr_l__self___features___1_____1___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___1_____1___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___1_____1___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___1_____1___attn_relative_position_index
          ) = None
      relative_position_bias_5 = relative_position_bias_4.view(49, 49, -1)
      relative_position_bias_4 = None
      permute_5 = relative_position_bias_5.permute(2, 0, 1)
      relative_position_bias_5 = None
      contiguous_2 = permute_5.contiguous()
      permute_5 = None
      relative_position_bias_7 = contiguous_2.unsqueeze(0)
      contiguous_2 = None
      getattr_getattr_l__self___features___1_____1___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___1_____1___attn_qkv_weight)
      getattr_getattr_l__self___features___1_____1___attn_proj_weight = (self.
          getattr_getattr_L__self___features___1_____1___attn_proj_weight)
      getattr_getattr_l__self___features___1_____1___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___1_____1___attn_qkv_bias)
      getattr_getattr_l__self___features___1_____1___attn_proj_bias = (self.
          getattr_getattr_L__self___features___1_____1___attn_proj_bias)
      x_11 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___1_____1___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___1_____1___norm1 = None
      x_12 = torch.roll(x_11, shifts=(-3, -3), dims=(1, 2))
      x_11 = None
      x_13 = x_12.view(1, 8, 7, 8, 7, 96)
      x_12 = None
      permute_6 = x_13.permute(0, 1, 3, 2, 4, 5)
      x_13 = None
      x_14 = permute_6.reshape(64, 49, 96)
      permute_6 = None
      qkv_2 = torch._C._nn.linear(x_14,
          getattr_getattr_l__self___features___1_____1___attn_qkv_weight,
          getattr_getattr_l__self___features___1_____1___attn_qkv_bias)
      getattr_getattr_l__self___features___1_____1___attn_qkv_weight = None
      getattr_getattr_l__self___features___1_____1___attn_qkv_bias = None
      reshape_5 = qkv_2.reshape(64, 49, 3, 3, 32)
      qkv_2 = None
      qkv_3 = reshape_5.permute(2, 0, 3, 1, 4)
      reshape_5 = None
      q_2 = qkv_3[0]
      k_1 = qkv_3[1]
      v_1 = qkv_3[2]
      qkv_3 = None
      q_3 = q_2 * 0.1767766952966369
      q_2 = None
      transpose_2 = k_1.transpose(-2, -1)
      k_1 = None
      attn_4 = q_3.matmul(transpose_2)
      q_3 = None
      transpose_2 = None
      attn_5 = attn_4 + relative_position_bias_7
      attn_4 = None
      relative_position_bias_7 = None
      attn_mask = x_14.new_zeros((56, 56))
      x_14 = None
      attn_mask[slice(0, -7, None), slice(0, -7, None)] = 0
      setitem = attn_mask
      attn_mask[slice(0, -7, None), slice(-7, -3, None)] = 1
      setitem_1 = attn_mask
      attn_mask[slice(0, -7, None), slice(-3, None, None)] = 2
      setitem_2 = attn_mask
      attn_mask[slice(-7, -3, None), slice(0, -7, None)] = 3
      setitem_3 = attn_mask
      attn_mask[slice(-7, -3, None), slice(-7, -3, None)] = 4
      setitem_4 = attn_mask
      attn_mask[slice(-7, -3, None), slice(-3, None, None)] = 5
      setitem_5 = attn_mask
      attn_mask[slice(-3, None, None), slice(0, -7, None)] = 6
      setitem_6 = attn_mask
      attn_mask[slice(-3, None, None), slice(-7, -3, None)] = 7
      setitem_7 = attn_mask
      attn_mask[slice(-3, None, None), slice(-3, None, None)] = 8
      setitem_8 = attn_mask
      attn_mask_1 = attn_mask.view(8, 7, 8, 7)
      attn_mask = None
      permute_8 = attn_mask_1.permute(0, 2, 1, 3)
      attn_mask_1 = None
      attn_mask_2 = permute_8.reshape(64, 49)
      permute_8 = None
      unsqueeze_2 = attn_mask_2.unsqueeze(1)
      unsqueeze_3 = attn_mask_2.unsqueeze(2)
      attn_mask_2 = None
      attn_mask_3 = unsqueeze_2 - unsqueeze_3
      unsqueeze_2 = None
      unsqueeze_3 = None
      ne = attn_mask_3 != 0
      masked_fill = attn_mask_3.masked_fill(ne, -100.0)
      ne = None
      eq = attn_mask_3 == 0
      attn_mask_3 = None
      attn_mask_4 = masked_fill.masked_fill(eq, 0.0)
      masked_fill = None
      eq = None
      attn_6 = attn_5.view(1, 64, 3, 49, 49)
      attn_5 = None
      unsqueeze_4 = attn_mask_4.unsqueeze(1)
      attn_mask_4 = None
      unsqueeze_5 = unsqueeze_4.unsqueeze(0)
      unsqueeze_4 = None
      attn_7 = attn_6 + unsqueeze_5
      attn_6 = None
      unsqueeze_5 = None
      attn_8 = attn_7.view(-1, 3, 49, 49)
      attn_7 = None
      attn_9 = torch.nn.functional.softmax(attn_8, dim=-1)
      attn_8 = None
      attn_10 = torch.nn.functional.dropout(attn_9, p=0.0, training=True)
      attn_9 = None
      matmul_3 = attn_10.matmul(v_1)
      attn_10 = None
      v_1 = None
      transpose_3 = matmul_3.transpose(1, 2)
      matmul_3 = None
      x_15 = transpose_3.reshape(64, 49, 96)
      transpose_3 = None
      x_16 = torch._C._nn.linear(x_15,
          getattr_getattr_l__self___features___1_____1___attn_proj_weight,
          getattr_getattr_l__self___features___1_____1___attn_proj_bias)
      x_15 = None
      getattr_getattr_l__self___features___1_____1___attn_proj_weight = None
      getattr_getattr_l__self___features___1_____1___attn_proj_bias = None
      x_17 = torch.nn.functional.dropout(x_16, p=0.0, training=True)
      x_16 = None
      x_18 = x_17.view(1, 8, 8, 7, 7, 96)
      x_17 = None
      permute_9 = x_18.permute(0, 1, 3, 2, 4, 5)
      x_18 = None
      x_19 = permute_9.reshape(1, 56, 56, 96)
      permute_9 = None
      x_20 = torch.roll(x_19, shifts=(3, 3), dims=(1, 2))
      x_19 = None
      getitem_9 = x_20[slice(None, None, None), slice(None, 56, None), slice(None,
          56, None), slice(None, None, None)]
      x_20 = None
      x_21 = getitem_9.contiguous()
      getitem_9 = None
      _log_api_usage_once_2 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_1801 = []
      __temp_1801.extend((1, 1, 1, 1))
      noise = torch.empty(__temp_1801, dtype=torch.float32, device=device(type='cpu')
          )
      noise_1 = noise.bernoulli_(0.9818181818181818)
      noise = None
      div_ = noise_1.div_(0.9818181818181818)
      mul_2 = x_21 * noise_1
      x_21 = None
      noise_1 = None
      x_22 = x_10 + mul_2
      x_10 = None
      mul_2 = None
      getattr_getattr_l__self___features___1_____1___norm2 = (self.
          getattr_getattr_L__self___features___1_____1___norm2(x_22))
      getattr_getattr_l__self___features___1_____1___mlp_0 = (self.
          getattr_getattr_L__self___features___1_____1___mlp_0(
          getattr_getattr_l__self___features___1_____1___norm2))
      getattr_getattr_l__self___features___1_____1___norm2 = None
      getattr_getattr_l__self___features___1_____1___mlp_1 = (self.
          getattr_getattr_L__self___features___1_____1___mlp_1(
          getattr_getattr_l__self___features___1_____1___mlp_0))
      getattr_getattr_l__self___features___1_____1___mlp_0 = None
      getattr_getattr_l__self___features___1_____1___mlp_2 = (self.
          getattr_getattr_L__self___features___1_____1___mlp_2(
          getattr_getattr_l__self___features___1_____1___mlp_1))
      getattr_getattr_l__self___features___1_____1___mlp_1 = None
      getattr_getattr_l__self___features___1_____1___mlp_3 = (self.
          getattr_getattr_L__self___features___1_____1___mlp_3(
          getattr_getattr_l__self___features___1_____1___mlp_2))
      getattr_getattr_l__self___features___1_____1___mlp_2 = None
      getattr_getattr_l__self___features___1_____1___mlp_4 = (self.
          getattr_getattr_L__self___features___1_____1___mlp_4(
          getattr_getattr_l__self___features___1_____1___mlp_3))
      getattr_getattr_l__self___features___1_____1___mlp_3 = None
      _log_api_usage_once_3 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_1814 = []
      __temp_1814.extend((1, 1, 1, 1))
      noise_2 = torch.empty(__temp_1814, dtype=torch.float32, device=device(type=
          'cpu'))
      noise_3 = noise_2.bernoulli_(0.9818181818181818)
      noise_2 = None
      div__1 = noise_3.div_(0.9818181818181818)
      mul_3 = getattr_getattr_l__self___features___1_____1___mlp_4 * noise_3
      getattr_getattr_l__self___features___1_____1___mlp_4 = None
      noise_3 = None
      x_23 = x_22 + mul_3
      x_22 = None
      mul_3 = None
      x_24 = torch.nn.functional.pad(x_23, (0, 0, 0, 0, 0, 0))
      x_23 = None
      x0 = x_24[Ellipsis, slice(0, None, 2), slice(0, None, 2), slice(None, None,
          None)]
      x1 = x_24[Ellipsis, slice(1, None, 2), slice(0, None, 2), slice(None, None,
          None)]
      x2 = x_24[Ellipsis, slice(0, None, 2), slice(1, None, 2), slice(None, None,
          None)]
      x3 = x_24[Ellipsis, slice(1, None, 2), slice(1, None, 2), slice(None, None,
          None)]
      x_24 = None
      x_26 = torch.cat([x0, x1, x2, x3], -1)
      x0 = None
      x1 = None
      x2 = None
      x3 = None
      x_27 = self.getattr_L__self___features___2___norm(x_26)
      x_26 = None
      x_28 = self.getattr_L__self___features___2___reduction(x_27)
      x_27 = None
      getattr_getattr_l__self___features___3_____0___norm1 = (self.
          getattr_getattr_L__self___features___3_____0___norm1(x_28))
      (
          getattr_getattr_l__self___features___3_____0___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___3_____0___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___3_____0___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___3_____0___attn_relative_position_index
          )
      relative_position_bias_8 = (
          getattr_getattr_l__self___features___3_____0___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___3_____0___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___3_____0___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___3_____0___attn_relative_position_index
          ) = None
      relative_position_bias_9 = relative_position_bias_8.view(49, 49, -1)
      relative_position_bias_8 = None
      permute_10 = relative_position_bias_9.permute(2, 0, 1)
      relative_position_bias_9 = None
      contiguous_4 = permute_10.contiguous()
      permute_10 = None
      relative_position_bias_11 = contiguous_4.unsqueeze(0)
      contiguous_4 = None
      getattr_getattr_l__self___features___3_____0___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___3_____0___attn_qkv_weight)
      getattr_getattr_l__self___features___3_____0___attn_proj_weight = (self.
          getattr_getattr_L__self___features___3_____0___attn_proj_weight)
      getattr_getattr_l__self___features___3_____0___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___3_____0___attn_qkv_bias)
      getattr_getattr_l__self___features___3_____0___attn_proj_bias = (self.
          getattr_getattr_L__self___features___3_____0___attn_proj_bias)
      x_29 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___3_____0___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___3_____0___norm1 = None
      x_30 = x_29.view(1, 4, 7, 4, 7, 192)
      x_29 = None
      permute_11 = x_30.permute(0, 1, 3, 2, 4, 5)
      x_30 = None
      x_31 = permute_11.reshape(16, 49, 192)
      permute_11 = None
      qkv_4 = torch._C._nn.linear(x_31,
          getattr_getattr_l__self___features___3_____0___attn_qkv_weight,
          getattr_getattr_l__self___features___3_____0___attn_qkv_bias)
      x_31 = None
      getattr_getattr_l__self___features___3_____0___attn_qkv_weight = None
      getattr_getattr_l__self___features___3_____0___attn_qkv_bias = None
      reshape_10 = qkv_4.reshape(16, 49, 3, 6, 32)
      qkv_4 = None
      qkv_5 = reshape_10.permute(2, 0, 3, 1, 4)
      reshape_10 = None
      q_4 = qkv_5[0]
      k_2 = qkv_5[1]
      v_2 = qkv_5[2]
      qkv_5 = None
      q_5 = q_4 * 0.1767766952966369
      q_4 = None
      transpose_4 = k_2.transpose(-2, -1)
      k_2 = None
      attn_11 = q_5.matmul(transpose_4)
      q_5 = None
      transpose_4 = None
      attn_12 = attn_11 + relative_position_bias_11
      attn_11 = None
      relative_position_bias_11 = None
      attn_13 = torch.nn.functional.softmax(attn_12, dim=-1)
      attn_12 = None
      attn_14 = torch.nn.functional.dropout(attn_13, p=0.0, training=True)
      attn_13 = None
      matmul_5 = attn_14.matmul(v_2)
      attn_14 = None
      v_2 = None
      transpose_5 = matmul_5.transpose(1, 2)
      matmul_5 = None
      x_32 = transpose_5.reshape(16, 49, 192)
      transpose_5 = None
      x_33 = torch._C._nn.linear(x_32,
          getattr_getattr_l__self___features___3_____0___attn_proj_weight,
          getattr_getattr_l__self___features___3_____0___attn_proj_bias)
      x_32 = None
      getattr_getattr_l__self___features___3_____0___attn_proj_weight = None
      getattr_getattr_l__self___features___3_____0___attn_proj_bias = None
      x_34 = torch.nn.functional.dropout(x_33, p=0.0, training=True)
      x_33 = None
      x_35 = x_34.view(1, 4, 4, 7, 7, 192)
      x_34 = None
      permute_13 = x_35.permute(0, 1, 3, 2, 4, 5)
      x_35 = None
      x_36 = permute_13.reshape(1, 28, 28, 192)
      permute_13 = None
      getitem_18 = x_36[slice(None, None, None), slice(None, 28, None), slice(
          None, 28, None), slice(None, None, None)]
      x_36 = None
      x_37 = getitem_18.contiguous()
      getitem_18 = None
      _log_api_usage_once_4 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_1867 = []
      __temp_1867.extend((1, 1, 1, 1))
      noise_4 = torch.empty(__temp_1867, dtype=torch.float32, device=device(type=
          'cpu'))
      noise_5 = noise_4.bernoulli_(0.9636363636363636)
      noise_4 = None
      div__2 = noise_5.div_(0.9636363636363636)
      mul_5 = x_37 * noise_5
      x_37 = None
      noise_5 = None
      x_38 = x_28 + mul_5
      x_28 = None
      mul_5 = None
      getattr_getattr_l__self___features___3_____0___norm2 = (self.
          getattr_getattr_L__self___features___3_____0___norm2(x_38))
      getattr_getattr_l__self___features___3_____0___mlp_0 = (self.
          getattr_getattr_L__self___features___3_____0___mlp_0(
          getattr_getattr_l__self___features___3_____0___norm2))
      getattr_getattr_l__self___features___3_____0___norm2 = None
      getattr_getattr_l__self___features___3_____0___mlp_1 = (self.
          getattr_getattr_L__self___features___3_____0___mlp_1(
          getattr_getattr_l__self___features___3_____0___mlp_0))
      getattr_getattr_l__self___features___3_____0___mlp_0 = None
      getattr_getattr_l__self___features___3_____0___mlp_2 = (self.
          getattr_getattr_L__self___features___3_____0___mlp_2(
          getattr_getattr_l__self___features___3_____0___mlp_1))
      getattr_getattr_l__self___features___3_____0___mlp_1 = None
      getattr_getattr_l__self___features___3_____0___mlp_3 = (self.
          getattr_getattr_L__self___features___3_____0___mlp_3(
          getattr_getattr_l__self___features___3_____0___mlp_2))
      getattr_getattr_l__self___features___3_____0___mlp_2 = None
      getattr_getattr_l__self___features___3_____0___mlp_4 = (self.
          getattr_getattr_L__self___features___3_____0___mlp_4(
          getattr_getattr_l__self___features___3_____0___mlp_3))
      getattr_getattr_l__self___features___3_____0___mlp_3 = None
      _log_api_usage_once_5 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_1880 = []
      __temp_1880.extend((1, 1, 1, 1))
      noise_6 = torch.empty(__temp_1880, dtype=torch.float32, device=device(type=
          'cpu'))
      noise_7 = noise_6.bernoulli_(0.9636363636363636)
      noise_6 = None
      div__3 = noise_7.div_(0.9636363636363636)
      mul_6 = getattr_getattr_l__self___features___3_____0___mlp_4 * noise_7
      getattr_getattr_l__self___features___3_____0___mlp_4 = None
      noise_7 = None
      x_39 = x_38 + mul_6
      x_38 = None
      mul_6 = None
      getattr_getattr_l__self___features___3_____1___norm1 = (self.
          getattr_getattr_L__self___features___3_____1___norm1(x_39))
      (
          getattr_getattr_l__self___features___3_____1___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___3_____1___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___3_____1___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___3_____1___attn_relative_position_index
          )
      relative_position_bias_12 = (
          getattr_getattr_l__self___features___3_____1___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___3_____1___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___3_____1___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___3_____1___attn_relative_position_index
          ) = None
      relative_position_bias_13 = relative_position_bias_12.view(49, 49, -1)
      relative_position_bias_12 = None
      permute_14 = relative_position_bias_13.permute(2, 0, 1)
      relative_position_bias_13 = None
      contiguous_6 = permute_14.contiguous()
      permute_14 = None
      relative_position_bias_15 = contiguous_6.unsqueeze(0)
      contiguous_6 = None
      getattr_getattr_l__self___features___3_____1___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___3_____1___attn_qkv_weight)
      getattr_getattr_l__self___features___3_____1___attn_proj_weight = (self.
          getattr_getattr_L__self___features___3_____1___attn_proj_weight)
      getattr_getattr_l__self___features___3_____1___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___3_____1___attn_qkv_bias)
      getattr_getattr_l__self___features___3_____1___attn_proj_bias = (self.
          getattr_getattr_L__self___features___3_____1___attn_proj_bias)
      x_40 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___3_____1___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___3_____1___norm1 = None
      x_41 = torch.roll(x_40, shifts=(-3, -3), dims=(1, 2))
      x_40 = None
      x_42 = x_41.view(1, 4, 7, 4, 7, 192)
      x_41 = None
      permute_15 = x_42.permute(0, 1, 3, 2, 4, 5)
      x_42 = None
      x_43 = permute_15.reshape(16, 49, 192)
      permute_15 = None
      qkv_6 = torch._C._nn.linear(x_43,
          getattr_getattr_l__self___features___3_____1___attn_qkv_weight,
          getattr_getattr_l__self___features___3_____1___attn_qkv_bias)
      getattr_getattr_l__self___features___3_____1___attn_qkv_weight = None
      getattr_getattr_l__self___features___3_____1___attn_qkv_bias = None
      reshape_14 = qkv_6.reshape(16, 49, 3, 6, 32)
      qkv_6 = None
      qkv_7 = reshape_14.permute(2, 0, 3, 1, 4)
      reshape_14 = None
      q_6 = qkv_7[0]
      k_3 = qkv_7[1]
      v_3 = qkv_7[2]
      qkv_7 = None
      q_7 = q_6 * 0.1767766952966369
      q_6 = None
      transpose_6 = k_3.transpose(-2, -1)
      k_3 = None
      attn_15 = q_7.matmul(transpose_6)
      q_7 = None
      transpose_6 = None
      attn_16 = attn_15 + relative_position_bias_15
      attn_15 = None
      relative_position_bias_15 = None
      attn_mask_5 = x_43.new_zeros((28, 28))
      x_43 = None
      attn_mask_5[slice(0, -7, None), slice(0, -7, None)] = 0
      setitem_9 = attn_mask_5
      attn_mask_5[slice(0, -7, None), slice(-7, -3, None)] = 1
      setitem_10 = attn_mask_5
      attn_mask_5[slice(0, -7, None), slice(-3, None, None)] = 2
      setitem_11 = attn_mask_5
      attn_mask_5[slice(-7, -3, None), slice(0, -7, None)] = 3
      setitem_12 = attn_mask_5
      attn_mask_5[slice(-7, -3, None), slice(-7, -3, None)] = 4
      setitem_13 = attn_mask_5
      attn_mask_5[slice(-7, -3, None), slice(-3, None, None)] = 5
      setitem_14 = attn_mask_5
      attn_mask_5[slice(-3, None, None), slice(0, -7, None)] = 6
      setitem_15 = attn_mask_5
      attn_mask_5[slice(-3, None, None), slice(-7, -3, None)] = 7
      setitem_16 = attn_mask_5
      attn_mask_5[slice(-3, None, None), slice(-3, None, None)] = 8
      setitem_17 = attn_mask_5
      attn_mask_6 = attn_mask_5.view(4, 7, 4, 7)
      attn_mask_5 = None
      permute_17 = attn_mask_6.permute(0, 2, 1, 3)
      attn_mask_6 = None
      attn_mask_7 = permute_17.reshape(16, 49)
      permute_17 = None
      unsqueeze_8 = attn_mask_7.unsqueeze(1)
      unsqueeze_9 = attn_mask_7.unsqueeze(2)
      attn_mask_7 = None
      attn_mask_8 = unsqueeze_8 - unsqueeze_9
      unsqueeze_8 = None
      unsqueeze_9 = None
      ne_1 = attn_mask_8 != 0
      masked_fill_2 = attn_mask_8.masked_fill(ne_1, -100.0)
      ne_1 = None
      eq_1 = attn_mask_8 == 0
      attn_mask_8 = None
      attn_mask_9 = masked_fill_2.masked_fill(eq_1, 0.0)
      masked_fill_2 = None
      eq_1 = None
      attn_17 = attn_16.view(1, 16, 6, 49, 49)
      attn_16 = None
      unsqueeze_10 = attn_mask_9.unsqueeze(1)
      attn_mask_9 = None
      unsqueeze_11 = unsqueeze_10.unsqueeze(0)
      unsqueeze_10 = None
      attn_18 = attn_17 + unsqueeze_11
      attn_17 = None
      unsqueeze_11 = None
      attn_19 = attn_18.view(-1, 6, 49, 49)
      attn_18 = None
      attn_20 = torch.nn.functional.softmax(attn_19, dim=-1)
      attn_19 = None
      attn_21 = torch.nn.functional.dropout(attn_20, p=0.0, training=True)
      attn_20 = None
      matmul_7 = attn_21.matmul(v_3)
      attn_21 = None
      v_3 = None
      transpose_7 = matmul_7.transpose(1, 2)
      matmul_7 = None
      x_44 = transpose_7.reshape(16, 49, 192)
      transpose_7 = None
      x_45 = torch._C._nn.linear(x_44,
          getattr_getattr_l__self___features___3_____1___attn_proj_weight,
          getattr_getattr_l__self___features___3_____1___attn_proj_bias)
      x_44 = None
      getattr_getattr_l__self___features___3_____1___attn_proj_weight = None
      getattr_getattr_l__self___features___3_____1___attn_proj_bias = None
      x_46 = torch.nn.functional.dropout(x_45, p=0.0, training=True)
      x_45 = None
      x_47 = x_46.view(1, 4, 4, 7, 7, 192)
      x_46 = None
      permute_18 = x_47.permute(0, 1, 3, 2, 4, 5)
      x_47 = None
      x_48 = permute_18.reshape(1, 28, 28, 192)
      permute_18 = None
      x_49 = torch.roll(x_48, shifts=(3, 3), dims=(1, 2))
      x_48 = None
      getitem_23 = x_49[slice(None, None, None), slice(None, 28, None), slice(
          None, 28, None), slice(None, None, None)]
      x_49 = None
      x_50 = getitem_23.contiguous()
      getitem_23 = None
      _log_api_usage_once_6 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_1948 = []
      __temp_1948.extend((1, 1, 1, 1))
      noise_8 = torch.empty(__temp_1948, dtype=torch.float32, device=device(type=
          'cpu'))
      noise_9 = noise_8.bernoulli_(0.9454545454545454)
      noise_8 = None
      div__4 = noise_9.div_(0.9454545454545454)
      mul_8 = x_50 * noise_9
      x_50 = None
      noise_9 = None
      x_51 = x_39 + mul_8
      x_39 = None
      mul_8 = None
      getattr_getattr_l__self___features___3_____1___norm2 = (self.
          getattr_getattr_L__self___features___3_____1___norm2(x_51))
      getattr_getattr_l__self___features___3_____1___mlp_0 = (self.
          getattr_getattr_L__self___features___3_____1___mlp_0(
          getattr_getattr_l__self___features___3_____1___norm2))
      getattr_getattr_l__self___features___3_____1___norm2 = None
      getattr_getattr_l__self___features___3_____1___mlp_1 = (self.
          getattr_getattr_L__self___features___3_____1___mlp_1(
          getattr_getattr_l__self___features___3_____1___mlp_0))
      getattr_getattr_l__self___features___3_____1___mlp_0 = None
      getattr_getattr_l__self___features___3_____1___mlp_2 = (self.
          getattr_getattr_L__self___features___3_____1___mlp_2(
          getattr_getattr_l__self___features___3_____1___mlp_1))
      getattr_getattr_l__self___features___3_____1___mlp_1 = None
      getattr_getattr_l__self___features___3_____1___mlp_3 = (self.
          getattr_getattr_L__self___features___3_____1___mlp_3(
          getattr_getattr_l__self___features___3_____1___mlp_2))
      getattr_getattr_l__self___features___3_____1___mlp_2 = None
      getattr_getattr_l__self___features___3_____1___mlp_4 = (self.
          getattr_getattr_L__self___features___3_____1___mlp_4(
          getattr_getattr_l__self___features___3_____1___mlp_3))
      getattr_getattr_l__self___features___3_____1___mlp_3 = None
      _log_api_usage_once_7 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_1961 = []
      __temp_1961.extend((1, 1, 1, 1))
      noise_10 = torch.empty(__temp_1961, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_11 = noise_10.bernoulli_(0.9454545454545454)
      noise_10 = None
      div__5 = noise_11.div_(0.9454545454545454)
      mul_9 = getattr_getattr_l__self___features___3_____1___mlp_4 * noise_11
      getattr_getattr_l__self___features___3_____1___mlp_4 = None
      noise_11 = None
      x_52 = x_51 + mul_9
      x_51 = None
      mul_9 = None
      x_53 = torch.nn.functional.pad(x_52, (0, 0, 0, 0, 0, 0))
      x_52 = None
      x0_1 = x_53[Ellipsis, slice(0, None, 2), slice(0, None, 2), slice(None,
          None, None)]
      x1_1 = x_53[Ellipsis, slice(1, None, 2), slice(0, None, 2), slice(None,
          None, None)]
      x2_1 = x_53[Ellipsis, slice(0, None, 2), slice(1, None, 2), slice(None,
          None, None)]
      x3_1 = x_53[Ellipsis, slice(1, None, 2), slice(1, None, 2), slice(None,
          None, None)]
      x_53 = None
      x_55 = torch.cat([x0_1, x1_1, x2_1, x3_1], -1)
      x0_1 = None
      x1_1 = None
      x2_1 = None
      x3_1 = None
      x_56 = self.getattr_L__self___features___4___norm(x_55)
      x_55 = None
      x_57 = self.getattr_L__self___features___4___reduction(x_56)
      x_56 = None
      getattr_getattr_l__self___features___5_____0___norm1 = (self.
          getattr_getattr_L__self___features___5_____0___norm1(x_57))
      (
          getattr_getattr_l__self___features___5_____0___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___5_____0___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___5_____0___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___5_____0___attn_relative_position_index
          )
      relative_position_bias_16 = (
          getattr_getattr_l__self___features___5_____0___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___5_____0___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___5_____0___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___5_____0___attn_relative_position_index
          ) = None
      relative_position_bias_17 = relative_position_bias_16.view(49, 49, -1)
      relative_position_bias_16 = None
      permute_19 = relative_position_bias_17.permute(2, 0, 1)
      relative_position_bias_17 = None
      contiguous_8 = permute_19.contiguous()
      permute_19 = None
      relative_position_bias_19 = contiguous_8.unsqueeze(0)
      contiguous_8 = None
      getattr_getattr_l__self___features___5_____0___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___5_____0___attn_qkv_weight)
      getattr_getattr_l__self___features___5_____0___attn_proj_weight = (self.
          getattr_getattr_L__self___features___5_____0___attn_proj_weight)
      getattr_getattr_l__self___features___5_____0___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___5_____0___attn_qkv_bias)
      getattr_getattr_l__self___features___5_____0___attn_proj_bias = (self.
          getattr_getattr_L__self___features___5_____0___attn_proj_bias)
      x_58 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___5_____0___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___5_____0___norm1 = None
      x_59 = x_58.view(1, 2, 7, 2, 7, 384)
      x_58 = None
      permute_20 = x_59.permute(0, 1, 3, 2, 4, 5)
      x_59 = None
      x_60 = permute_20.reshape(4, 49, 384)
      permute_20 = None
      qkv_8 = torch._C._nn.linear(x_60,
          getattr_getattr_l__self___features___5_____0___attn_qkv_weight,
          getattr_getattr_l__self___features___5_____0___attn_qkv_bias)
      x_60 = None
      getattr_getattr_l__self___features___5_____0___attn_qkv_weight = None
      getattr_getattr_l__self___features___5_____0___attn_qkv_bias = None
      reshape_19 = qkv_8.reshape(4, 49, 3, 12, 32)
      qkv_8 = None
      qkv_9 = reshape_19.permute(2, 0, 3, 1, 4)
      reshape_19 = None
      q_8 = qkv_9[0]
      k_4 = qkv_9[1]
      v_4 = qkv_9[2]
      qkv_9 = None
      q_9 = q_8 * 0.1767766952966369
      q_8 = None
      transpose_8 = k_4.transpose(-2, -1)
      k_4 = None
      attn_22 = q_9.matmul(transpose_8)
      q_9 = None
      transpose_8 = None
      attn_23 = attn_22 + relative_position_bias_19
      attn_22 = None
      relative_position_bias_19 = None
      attn_24 = torch.nn.functional.softmax(attn_23, dim=-1)
      attn_23 = None
      attn_25 = torch.nn.functional.dropout(attn_24, p=0.0, training=True)
      attn_24 = None
      matmul_9 = attn_25.matmul(v_4)
      attn_25 = None
      v_4 = None
      transpose_9 = matmul_9.transpose(1, 2)
      matmul_9 = None
      x_61 = transpose_9.reshape(4, 49, 384)
      transpose_9 = None
      x_62 = torch._C._nn.linear(x_61,
          getattr_getattr_l__self___features___5_____0___attn_proj_weight,
          getattr_getattr_l__self___features___5_____0___attn_proj_bias)
      x_61 = None
      getattr_getattr_l__self___features___5_____0___attn_proj_weight = None
      getattr_getattr_l__self___features___5_____0___attn_proj_bias = None
      x_63 = torch.nn.functional.dropout(x_62, p=0.0, training=True)
      x_62 = None
      x_64 = x_63.view(1, 2, 2, 7, 7, 384)
      x_63 = None
      permute_22 = x_64.permute(0, 1, 3, 2, 4, 5)
      x_64 = None
      x_65 = permute_22.reshape(1, 14, 14, 384)
      permute_22 = None
      getitem_32 = x_65[slice(None, None, None), slice(None, 14, None), slice(
          None, 14, None), slice(None, None, None)]
      x_65 = None
      x_66 = getitem_32.contiguous()
      getitem_32 = None
      _log_api_usage_once_8 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2014 = []
      __temp_2014.extend((1, 1, 1, 1))
      noise_12 = torch.empty(__temp_2014, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_13 = noise_12.bernoulli_(0.9272727272727272)
      noise_12 = None
      div__6 = noise_13.div_(0.9272727272727272)
      mul_11 = x_66 * noise_13
      x_66 = None
      noise_13 = None
      x_67 = x_57 + mul_11
      x_57 = None
      mul_11 = None
      getattr_getattr_l__self___features___5_____0___norm2 = (self.
          getattr_getattr_L__self___features___5_____0___norm2(x_67))
      getattr_getattr_l__self___features___5_____0___mlp_0 = (self.
          getattr_getattr_L__self___features___5_____0___mlp_0(
          getattr_getattr_l__self___features___5_____0___norm2))
      getattr_getattr_l__self___features___5_____0___norm2 = None
      getattr_getattr_l__self___features___5_____0___mlp_1 = (self.
          getattr_getattr_L__self___features___5_____0___mlp_1(
          getattr_getattr_l__self___features___5_____0___mlp_0))
      getattr_getattr_l__self___features___5_____0___mlp_0 = None
      getattr_getattr_l__self___features___5_____0___mlp_2 = (self.
          getattr_getattr_L__self___features___5_____0___mlp_2(
          getattr_getattr_l__self___features___5_____0___mlp_1))
      getattr_getattr_l__self___features___5_____0___mlp_1 = None
      getattr_getattr_l__self___features___5_____0___mlp_3 = (self.
          getattr_getattr_L__self___features___5_____0___mlp_3(
          getattr_getattr_l__self___features___5_____0___mlp_2))
      getattr_getattr_l__self___features___5_____0___mlp_2 = None
      getattr_getattr_l__self___features___5_____0___mlp_4 = (self.
          getattr_getattr_L__self___features___5_____0___mlp_4(
          getattr_getattr_l__self___features___5_____0___mlp_3))
      getattr_getattr_l__self___features___5_____0___mlp_3 = None
      _log_api_usage_once_9 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2027 = []
      __temp_2027.extend((1, 1, 1, 1))
      noise_14 = torch.empty(__temp_2027, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_15 = noise_14.bernoulli_(0.9272727272727272)
      noise_14 = None
      div__7 = noise_15.div_(0.9272727272727272)
      mul_12 = getattr_getattr_l__self___features___5_____0___mlp_4 * noise_15
      getattr_getattr_l__self___features___5_____0___mlp_4 = None
      noise_15 = None
      x_68 = x_67 + mul_12
      x_67 = None
      mul_12 = None
      getattr_getattr_l__self___features___5_____1___norm1 = (self.
          getattr_getattr_L__self___features___5_____1___norm1(x_68))
      (
          getattr_getattr_l__self___features___5_____1___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___5_____1___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___5_____1___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___5_____1___attn_relative_position_index
          )
      relative_position_bias_20 = (
          getattr_getattr_l__self___features___5_____1___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___5_____1___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___5_____1___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___5_____1___attn_relative_position_index
          ) = None
      relative_position_bias_21 = relative_position_bias_20.view(49, 49, -1)
      relative_position_bias_20 = None
      permute_23 = relative_position_bias_21.permute(2, 0, 1)
      relative_position_bias_21 = None
      contiguous_10 = permute_23.contiguous()
      permute_23 = None
      relative_position_bias_23 = contiguous_10.unsqueeze(0)
      contiguous_10 = None
      getattr_getattr_l__self___features___5_____1___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___5_____1___attn_qkv_weight)
      getattr_getattr_l__self___features___5_____1___attn_proj_weight = (self.
          getattr_getattr_L__self___features___5_____1___attn_proj_weight)
      getattr_getattr_l__self___features___5_____1___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___5_____1___attn_qkv_bias)
      getattr_getattr_l__self___features___5_____1___attn_proj_bias = (self.
          getattr_getattr_L__self___features___5_____1___attn_proj_bias)
      x_69 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___5_____1___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___5_____1___norm1 = None
      x_70 = torch.roll(x_69, shifts=(-3, -3), dims=(1, 2))
      x_69 = None
      x_71 = x_70.view(1, 2, 7, 2, 7, 384)
      x_70 = None
      permute_24 = x_71.permute(0, 1, 3, 2, 4, 5)
      x_71 = None
      x_72 = permute_24.reshape(4, 49, 384)
      permute_24 = None
      qkv_10 = torch._C._nn.linear(x_72,
          getattr_getattr_l__self___features___5_____1___attn_qkv_weight,
          getattr_getattr_l__self___features___5_____1___attn_qkv_bias)
      getattr_getattr_l__self___features___5_____1___attn_qkv_weight = None
      getattr_getattr_l__self___features___5_____1___attn_qkv_bias = None
      reshape_23 = qkv_10.reshape(4, 49, 3, 12, 32)
      qkv_10 = None
      qkv_11 = reshape_23.permute(2, 0, 3, 1, 4)
      reshape_23 = None
      q_10 = qkv_11[0]
      k_5 = qkv_11[1]
      v_5 = qkv_11[2]
      qkv_11 = None
      q_11 = q_10 * 0.1767766952966369
      q_10 = None
      transpose_10 = k_5.transpose(-2, -1)
      k_5 = None
      attn_26 = q_11.matmul(transpose_10)
      q_11 = None
      transpose_10 = None
      attn_27 = attn_26 + relative_position_bias_23
      attn_26 = None
      relative_position_bias_23 = None
      attn_mask_10 = x_72.new_zeros((14, 14))
      x_72 = None
      attn_mask_10[slice(0, -7, None), slice(0, -7, None)] = 0
      setitem_18 = attn_mask_10
      attn_mask_10[slice(0, -7, None), slice(-7, -3, None)] = 1
      setitem_19 = attn_mask_10
      attn_mask_10[slice(0, -7, None), slice(-3, None, None)] = 2
      setitem_20 = attn_mask_10
      attn_mask_10[slice(-7, -3, None), slice(0, -7, None)] = 3
      setitem_21 = attn_mask_10
      attn_mask_10[slice(-7, -3, None), slice(-7, -3, None)] = 4
      setitem_22 = attn_mask_10
      attn_mask_10[slice(-7, -3, None), slice(-3, None, None)] = 5
      setitem_23 = attn_mask_10
      attn_mask_10[slice(-3, None, None), slice(0, -7, None)] = 6
      setitem_24 = attn_mask_10
      attn_mask_10[slice(-3, None, None), slice(-7, -3, None)] = 7
      setitem_25 = attn_mask_10
      attn_mask_10[slice(-3, None, None), slice(-3, None, None)] = 8
      setitem_26 = attn_mask_10
      attn_mask_11 = attn_mask_10.view(2, 7, 2, 7)
      attn_mask_10 = None
      permute_26 = attn_mask_11.permute(0, 2, 1, 3)
      attn_mask_11 = None
      attn_mask_12 = permute_26.reshape(4, 49)
      permute_26 = None
      unsqueeze_14 = attn_mask_12.unsqueeze(1)
      unsqueeze_15 = attn_mask_12.unsqueeze(2)
      attn_mask_12 = None
      attn_mask_13 = unsqueeze_14 - unsqueeze_15
      unsqueeze_14 = None
      unsqueeze_15 = None
      ne_2 = attn_mask_13 != 0
      masked_fill_4 = attn_mask_13.masked_fill(ne_2, -100.0)
      ne_2 = None
      eq_2 = attn_mask_13 == 0
      attn_mask_13 = None
      attn_mask_14 = masked_fill_4.masked_fill(eq_2, 0.0)
      masked_fill_4 = None
      eq_2 = None
      attn_28 = attn_27.view(1, 4, 12, 49, 49)
      attn_27 = None
      unsqueeze_16 = attn_mask_14.unsqueeze(1)
      attn_mask_14 = None
      unsqueeze_17 = unsqueeze_16.unsqueeze(0)
      unsqueeze_16 = None
      attn_29 = attn_28 + unsqueeze_17
      attn_28 = None
      unsqueeze_17 = None
      attn_30 = attn_29.view(-1, 12, 49, 49)
      attn_29 = None
      attn_31 = torch.nn.functional.softmax(attn_30, dim=-1)
      attn_30 = None
      attn_32 = torch.nn.functional.dropout(attn_31, p=0.0, training=True)
      attn_31 = None
      matmul_11 = attn_32.matmul(v_5)
      attn_32 = None
      v_5 = None
      transpose_11 = matmul_11.transpose(1, 2)
      matmul_11 = None
      x_73 = transpose_11.reshape(4, 49, 384)
      transpose_11 = None
      x_74 = torch._C._nn.linear(x_73,
          getattr_getattr_l__self___features___5_____1___attn_proj_weight,
          getattr_getattr_l__self___features___5_____1___attn_proj_bias)
      x_73 = None
      getattr_getattr_l__self___features___5_____1___attn_proj_weight = None
      getattr_getattr_l__self___features___5_____1___attn_proj_bias = None
      x_75 = torch.nn.functional.dropout(x_74, p=0.0, training=True)
      x_74 = None
      x_76 = x_75.view(1, 2, 2, 7, 7, 384)
      x_75 = None
      permute_27 = x_76.permute(0, 1, 3, 2, 4, 5)
      x_76 = None
      x_77 = permute_27.reshape(1, 14, 14, 384)
      permute_27 = None
      x_78 = torch.roll(x_77, shifts=(3, 3), dims=(1, 2))
      x_77 = None
      getitem_37 = x_78[slice(None, None, None), slice(None, 14, None), slice(
          None, 14, None), slice(None, None, None)]
      x_78 = None
      x_79 = getitem_37.contiguous()
      getitem_37 = None
      _log_api_usage_once_10 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2095 = []
      __temp_2095.extend((1, 1, 1, 1))
      noise_16 = torch.empty(__temp_2095, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_17 = noise_16.bernoulli_(0.9090909090909091)
      noise_16 = None
      div__8 = noise_17.div_(0.9090909090909091)
      mul_14 = x_79 * noise_17
      x_79 = None
      noise_17 = None
      x_80 = x_68 + mul_14
      x_68 = None
      mul_14 = None
      getattr_getattr_l__self___features___5_____1___norm2 = (self.
          getattr_getattr_L__self___features___5_____1___norm2(x_80))
      getattr_getattr_l__self___features___5_____1___mlp_0 = (self.
          getattr_getattr_L__self___features___5_____1___mlp_0(
          getattr_getattr_l__self___features___5_____1___norm2))
      getattr_getattr_l__self___features___5_____1___norm2 = None
      getattr_getattr_l__self___features___5_____1___mlp_1 = (self.
          getattr_getattr_L__self___features___5_____1___mlp_1(
          getattr_getattr_l__self___features___5_____1___mlp_0))
      getattr_getattr_l__self___features___5_____1___mlp_0 = None
      getattr_getattr_l__self___features___5_____1___mlp_2 = (self.
          getattr_getattr_L__self___features___5_____1___mlp_2(
          getattr_getattr_l__self___features___5_____1___mlp_1))
      getattr_getattr_l__self___features___5_____1___mlp_1 = None
      getattr_getattr_l__self___features___5_____1___mlp_3 = (self.
          getattr_getattr_L__self___features___5_____1___mlp_3(
          getattr_getattr_l__self___features___5_____1___mlp_2))
      getattr_getattr_l__self___features___5_____1___mlp_2 = None
      getattr_getattr_l__self___features___5_____1___mlp_4 = (self.
          getattr_getattr_L__self___features___5_____1___mlp_4(
          getattr_getattr_l__self___features___5_____1___mlp_3))
      getattr_getattr_l__self___features___5_____1___mlp_3 = None
      _log_api_usage_once_11 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2108 = []
      __temp_2108.extend((1, 1, 1, 1))
      noise_18 = torch.empty(__temp_2108, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_19 = noise_18.bernoulli_(0.9090909090909091)
      noise_18 = None
      div__9 = noise_19.div_(0.9090909090909091)
      mul_15 = getattr_getattr_l__self___features___5_____1___mlp_4 * noise_19
      getattr_getattr_l__self___features___5_____1___mlp_4 = None
      noise_19 = None
      x_81 = x_80 + mul_15
      x_80 = None
      mul_15 = None
      getattr_getattr_l__self___features___5_____2___norm1 = (self.
          getattr_getattr_L__self___features___5_____2___norm1(x_81))
      (
          getattr_getattr_l__self___features___5_____2___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___5_____2___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___5_____2___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___5_____2___attn_relative_position_index
          )
      relative_position_bias_24 = (
          getattr_getattr_l__self___features___5_____2___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___5_____2___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___5_____2___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___5_____2___attn_relative_position_index
          ) = None
      relative_position_bias_25 = relative_position_bias_24.view(49, 49, -1)
      relative_position_bias_24 = None
      permute_28 = relative_position_bias_25.permute(2, 0, 1)
      relative_position_bias_25 = None
      contiguous_12 = permute_28.contiguous()
      permute_28 = None
      relative_position_bias_27 = contiguous_12.unsqueeze(0)
      contiguous_12 = None
      getattr_getattr_l__self___features___5_____2___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___5_____2___attn_qkv_weight)
      getattr_getattr_l__self___features___5_____2___attn_proj_weight = (self.
          getattr_getattr_L__self___features___5_____2___attn_proj_weight)
      getattr_getattr_l__self___features___5_____2___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___5_____2___attn_qkv_bias)
      getattr_getattr_l__self___features___5_____2___attn_proj_bias = (self.
          getattr_getattr_L__self___features___5_____2___attn_proj_bias)
      x_82 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___5_____2___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___5_____2___norm1 = None
      x_83 = x_82.view(1, 2, 7, 2, 7, 384)
      x_82 = None
      permute_29 = x_83.permute(0, 1, 3, 2, 4, 5)
      x_83 = None
      x_84 = permute_29.reshape(4, 49, 384)
      permute_29 = None
      qkv_12 = torch._C._nn.linear(x_84,
          getattr_getattr_l__self___features___5_____2___attn_qkv_weight,
          getattr_getattr_l__self___features___5_____2___attn_qkv_bias)
      x_84 = None
      getattr_getattr_l__self___features___5_____2___attn_qkv_weight = None
      getattr_getattr_l__self___features___5_____2___attn_qkv_bias = None
      reshape_28 = qkv_12.reshape(4, 49, 3, 12, 32)
      qkv_12 = None
      qkv_13 = reshape_28.permute(2, 0, 3, 1, 4)
      reshape_28 = None
      q_12 = qkv_13[0]
      k_6 = qkv_13[1]
      v_6 = qkv_13[2]
      qkv_13 = None
      q_13 = q_12 * 0.1767766952966369
      q_12 = None
      transpose_12 = k_6.transpose(-2, -1)
      k_6 = None
      attn_33 = q_13.matmul(transpose_12)
      q_13 = None
      transpose_12 = None
      attn_34 = attn_33 + relative_position_bias_27
      attn_33 = None
      relative_position_bias_27 = None
      attn_35 = torch.nn.functional.softmax(attn_34, dim=-1)
      attn_34 = None
      attn_36 = torch.nn.functional.dropout(attn_35, p=0.0, training=True)
      attn_35 = None
      matmul_13 = attn_36.matmul(v_6)
      attn_36 = None
      v_6 = None
      transpose_13 = matmul_13.transpose(1, 2)
      matmul_13 = None
      x_85 = transpose_13.reshape(4, 49, 384)
      transpose_13 = None
      x_86 = torch._C._nn.linear(x_85,
          getattr_getattr_l__self___features___5_____2___attn_proj_weight,
          getattr_getattr_l__self___features___5_____2___attn_proj_bias)
      x_85 = None
      getattr_getattr_l__self___features___5_____2___attn_proj_weight = None
      getattr_getattr_l__self___features___5_____2___attn_proj_bias = None
      x_87 = torch.nn.functional.dropout(x_86, p=0.0, training=True)
      x_86 = None
      x_88 = x_87.view(1, 2, 2, 7, 7, 384)
      x_87 = None
      permute_31 = x_88.permute(0, 1, 3, 2, 4, 5)
      x_88 = None
      x_89 = permute_31.reshape(1, 14, 14, 384)
      permute_31 = None
      getitem_42 = x_89[slice(None, None, None), slice(None, 14, None), slice(
          None, 14, None), slice(None, None, None)]
      x_89 = None
      x_90 = getitem_42.contiguous()
      getitem_42 = None
      _log_api_usage_once_12 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2144 = []
      __temp_2144.extend((1, 1, 1, 1))
      noise_20 = torch.empty(__temp_2144, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_21 = noise_20.bernoulli_(0.8909090909090909)
      noise_20 = None
      div__10 = noise_21.div_(0.8909090909090909)
      mul_17 = x_90 * noise_21
      x_90 = None
      noise_21 = None
      x_91 = x_81 + mul_17
      x_81 = None
      mul_17 = None
      getattr_getattr_l__self___features___5_____2___norm2 = (self.
          getattr_getattr_L__self___features___5_____2___norm2(x_91))
      getattr_getattr_l__self___features___5_____2___mlp_0 = (self.
          getattr_getattr_L__self___features___5_____2___mlp_0(
          getattr_getattr_l__self___features___5_____2___norm2))
      getattr_getattr_l__self___features___5_____2___norm2 = None
      getattr_getattr_l__self___features___5_____2___mlp_1 = (self.
          getattr_getattr_L__self___features___5_____2___mlp_1(
          getattr_getattr_l__self___features___5_____2___mlp_0))
      getattr_getattr_l__self___features___5_____2___mlp_0 = None
      getattr_getattr_l__self___features___5_____2___mlp_2 = (self.
          getattr_getattr_L__self___features___5_____2___mlp_2(
          getattr_getattr_l__self___features___5_____2___mlp_1))
      getattr_getattr_l__self___features___5_____2___mlp_1 = None
      getattr_getattr_l__self___features___5_____2___mlp_3 = (self.
          getattr_getattr_L__self___features___5_____2___mlp_3(
          getattr_getattr_l__self___features___5_____2___mlp_2))
      getattr_getattr_l__self___features___5_____2___mlp_2 = None
      getattr_getattr_l__self___features___5_____2___mlp_4 = (self.
          getattr_getattr_L__self___features___5_____2___mlp_4(
          getattr_getattr_l__self___features___5_____2___mlp_3))
      getattr_getattr_l__self___features___5_____2___mlp_3 = None
      _log_api_usage_once_13 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2157 = []
      __temp_2157.extend((1, 1, 1, 1))
      noise_22 = torch.empty(__temp_2157, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_23 = noise_22.bernoulli_(0.8909090909090909)
      noise_22 = None
      div__11 = noise_23.div_(0.8909090909090909)
      mul_18 = getattr_getattr_l__self___features___5_____2___mlp_4 * noise_23
      getattr_getattr_l__self___features___5_____2___mlp_4 = None
      noise_23 = None
      x_92 = x_91 + mul_18
      x_91 = None
      mul_18 = None
      getattr_getattr_l__self___features___5_____3___norm1 = (self.
          getattr_getattr_L__self___features___5_____3___norm1(x_92))
      (
          getattr_getattr_l__self___features___5_____3___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___5_____3___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___5_____3___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___5_____3___attn_relative_position_index
          )
      relative_position_bias_28 = (
          getattr_getattr_l__self___features___5_____3___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___5_____3___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___5_____3___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___5_____3___attn_relative_position_index
          ) = None
      relative_position_bias_29 = relative_position_bias_28.view(49, 49, -1)
      relative_position_bias_28 = None
      permute_32 = relative_position_bias_29.permute(2, 0, 1)
      relative_position_bias_29 = None
      contiguous_14 = permute_32.contiguous()
      permute_32 = None
      relative_position_bias_31 = contiguous_14.unsqueeze(0)
      contiguous_14 = None
      getattr_getattr_l__self___features___5_____3___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___5_____3___attn_qkv_weight)
      getattr_getattr_l__self___features___5_____3___attn_proj_weight = (self.
          getattr_getattr_L__self___features___5_____3___attn_proj_weight)
      getattr_getattr_l__self___features___5_____3___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___5_____3___attn_qkv_bias)
      getattr_getattr_l__self___features___5_____3___attn_proj_bias = (self.
          getattr_getattr_L__self___features___5_____3___attn_proj_bias)
      x_93 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___5_____3___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___5_____3___norm1 = None
      x_94 = torch.roll(x_93, shifts=(-3, -3), dims=(1, 2))
      x_93 = None
      x_95 = x_94.view(1, 2, 7, 2, 7, 384)
      x_94 = None
      permute_33 = x_95.permute(0, 1, 3, 2, 4, 5)
      x_95 = None
      x_96 = permute_33.reshape(4, 49, 384)
      permute_33 = None
      qkv_14 = torch._C._nn.linear(x_96,
          getattr_getattr_l__self___features___5_____3___attn_qkv_weight,
          getattr_getattr_l__self___features___5_____3___attn_qkv_bias)
      getattr_getattr_l__self___features___5_____3___attn_qkv_weight = None
      getattr_getattr_l__self___features___5_____3___attn_qkv_bias = None
      reshape_32 = qkv_14.reshape(4, 49, 3, 12, 32)
      qkv_14 = None
      qkv_15 = reshape_32.permute(2, 0, 3, 1, 4)
      reshape_32 = None
      q_14 = qkv_15[0]
      k_7 = qkv_15[1]
      v_7 = qkv_15[2]
      qkv_15 = None
      q_15 = q_14 * 0.1767766952966369
      q_14 = None
      transpose_14 = k_7.transpose(-2, -1)
      k_7 = None
      attn_37 = q_15.matmul(transpose_14)
      q_15 = None
      transpose_14 = None
      attn_38 = attn_37 + relative_position_bias_31
      attn_37 = None
      relative_position_bias_31 = None
      attn_mask_15 = x_96.new_zeros((14, 14))
      x_96 = None
      attn_mask_15[slice(0, -7, None), slice(0, -7, None)] = 0
      setitem_27 = attn_mask_15
      attn_mask_15[slice(0, -7, None), slice(-7, -3, None)] = 1
      setitem_28 = attn_mask_15
      attn_mask_15[slice(0, -7, None), slice(-3, None, None)] = 2
      setitem_29 = attn_mask_15
      attn_mask_15[slice(-7, -3, None), slice(0, -7, None)] = 3
      setitem_30 = attn_mask_15
      attn_mask_15[slice(-7, -3, None), slice(-7, -3, None)] = 4
      setitem_31 = attn_mask_15
      attn_mask_15[slice(-7, -3, None), slice(-3, None, None)] = 5
      setitem_32 = attn_mask_15
      attn_mask_15[slice(-3, None, None), slice(0, -7, None)] = 6
      setitem_33 = attn_mask_15
      attn_mask_15[slice(-3, None, None), slice(-7, -3, None)] = 7
      setitem_34 = attn_mask_15
      attn_mask_15[slice(-3, None, None), slice(-3, None, None)] = 8
      setitem_35 = attn_mask_15
      attn_mask_16 = attn_mask_15.view(2, 7, 2, 7)
      attn_mask_15 = None
      permute_35 = attn_mask_16.permute(0, 2, 1, 3)
      attn_mask_16 = None
      attn_mask_17 = permute_35.reshape(4, 49)
      permute_35 = None
      unsqueeze_20 = attn_mask_17.unsqueeze(1)
      unsqueeze_21 = attn_mask_17.unsqueeze(2)
      attn_mask_17 = None
      attn_mask_18 = unsqueeze_20 - unsqueeze_21
      unsqueeze_20 = None
      unsqueeze_21 = None
      ne_3 = attn_mask_18 != 0
      masked_fill_6 = attn_mask_18.masked_fill(ne_3, -100.0)
      ne_3 = None
      eq_3 = attn_mask_18 == 0
      attn_mask_18 = None
      attn_mask_19 = masked_fill_6.masked_fill(eq_3, 0.0)
      masked_fill_6 = None
      eq_3 = None
      attn_39 = attn_38.view(1, 4, 12, 49, 49)
      attn_38 = None
      unsqueeze_22 = attn_mask_19.unsqueeze(1)
      attn_mask_19 = None
      unsqueeze_23 = unsqueeze_22.unsqueeze(0)
      unsqueeze_22 = None
      attn_40 = attn_39 + unsqueeze_23
      attn_39 = None
      unsqueeze_23 = None
      attn_41 = attn_40.view(-1, 12, 49, 49)
      attn_40 = None
      attn_42 = torch.nn.functional.softmax(attn_41, dim=-1)
      attn_41 = None
      attn_43 = torch.nn.functional.dropout(attn_42, p=0.0, training=True)
      attn_42 = None
      matmul_15 = attn_43.matmul(v_7)
      attn_43 = None
      v_7 = None
      transpose_15 = matmul_15.transpose(1, 2)
      matmul_15 = None
      x_97 = transpose_15.reshape(4, 49, 384)
      transpose_15 = None
      x_98 = torch._C._nn.linear(x_97,
          getattr_getattr_l__self___features___5_____3___attn_proj_weight,
          getattr_getattr_l__self___features___5_____3___attn_proj_bias)
      x_97 = None
      getattr_getattr_l__self___features___5_____3___attn_proj_weight = None
      getattr_getattr_l__self___features___5_____3___attn_proj_bias = None
      x_99 = torch.nn.functional.dropout(x_98, p=0.0, training=True)
      x_98 = None
      x_100 = x_99.view(1, 2, 2, 7, 7, 384)
      x_99 = None
      permute_36 = x_100.permute(0, 1, 3, 2, 4, 5)
      x_100 = None
      x_101 = permute_36.reshape(1, 14, 14, 384)
      permute_36 = None
      x_102 = torch.roll(x_101, shifts=(3, 3), dims=(1, 2))
      x_101 = None
      getitem_47 = x_102[slice(None, None, None), slice(None, 14, None), slice(
          None, 14, None), slice(None, None, None)]
      x_102 = None
      x_103 = getitem_47.contiguous()
      getitem_47 = None
      _log_api_usage_once_14 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2225 = []
      __temp_2225.extend((1, 1, 1, 1))
      noise_24 = torch.empty(__temp_2225, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_25 = noise_24.bernoulli_(0.8727272727272727)
      noise_24 = None
      div__12 = noise_25.div_(0.8727272727272727)
      mul_20 = x_103 * noise_25
      x_103 = None
      noise_25 = None
      x_104 = x_92 + mul_20
      x_92 = None
      mul_20 = None
      getattr_getattr_l__self___features___5_____3___norm2 = (self.
          getattr_getattr_L__self___features___5_____3___norm2(x_104))
      getattr_getattr_l__self___features___5_____3___mlp_0 = (self.
          getattr_getattr_L__self___features___5_____3___mlp_0(
          getattr_getattr_l__self___features___5_____3___norm2))
      getattr_getattr_l__self___features___5_____3___norm2 = None
      getattr_getattr_l__self___features___5_____3___mlp_1 = (self.
          getattr_getattr_L__self___features___5_____3___mlp_1(
          getattr_getattr_l__self___features___5_____3___mlp_0))
      getattr_getattr_l__self___features___5_____3___mlp_0 = None
      getattr_getattr_l__self___features___5_____3___mlp_2 = (self.
          getattr_getattr_L__self___features___5_____3___mlp_2(
          getattr_getattr_l__self___features___5_____3___mlp_1))
      getattr_getattr_l__self___features___5_____3___mlp_1 = None
      getattr_getattr_l__self___features___5_____3___mlp_3 = (self.
          getattr_getattr_L__self___features___5_____3___mlp_3(
          getattr_getattr_l__self___features___5_____3___mlp_2))
      getattr_getattr_l__self___features___5_____3___mlp_2 = None
      getattr_getattr_l__self___features___5_____3___mlp_4 = (self.
          getattr_getattr_L__self___features___5_____3___mlp_4(
          getattr_getattr_l__self___features___5_____3___mlp_3))
      getattr_getattr_l__self___features___5_____3___mlp_3 = None
      _log_api_usage_once_15 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2238 = []
      __temp_2238.extend((1, 1, 1, 1))
      noise_26 = torch.empty(__temp_2238, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_27 = noise_26.bernoulli_(0.8727272727272727)
      noise_26 = None
      div__13 = noise_27.div_(0.8727272727272727)
      mul_21 = getattr_getattr_l__self___features___5_____3___mlp_4 * noise_27
      getattr_getattr_l__self___features___5_____3___mlp_4 = None
      noise_27 = None
      x_105 = x_104 + mul_21
      x_104 = None
      mul_21 = None
      getattr_getattr_l__self___features___5_____4___norm1 = (self.
          getattr_getattr_L__self___features___5_____4___norm1(x_105))
      (
          getattr_getattr_l__self___features___5_____4___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___5_____4___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___5_____4___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___5_____4___attn_relative_position_index
          )
      relative_position_bias_32 = (
          getattr_getattr_l__self___features___5_____4___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___5_____4___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___5_____4___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___5_____4___attn_relative_position_index
          ) = None
      relative_position_bias_33 = relative_position_bias_32.view(49, 49, -1)
      relative_position_bias_32 = None
      permute_37 = relative_position_bias_33.permute(2, 0, 1)
      relative_position_bias_33 = None
      contiguous_16 = permute_37.contiguous()
      permute_37 = None
      relative_position_bias_35 = contiguous_16.unsqueeze(0)
      contiguous_16 = None
      getattr_getattr_l__self___features___5_____4___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___5_____4___attn_qkv_weight)
      getattr_getattr_l__self___features___5_____4___attn_proj_weight = (self.
          getattr_getattr_L__self___features___5_____4___attn_proj_weight)
      getattr_getattr_l__self___features___5_____4___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___5_____4___attn_qkv_bias)
      getattr_getattr_l__self___features___5_____4___attn_proj_bias = (self.
          getattr_getattr_L__self___features___5_____4___attn_proj_bias)
      x_106 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___5_____4___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___5_____4___norm1 = None
      x_107 = x_106.view(1, 2, 7, 2, 7, 384)
      x_106 = None
      permute_38 = x_107.permute(0, 1, 3, 2, 4, 5)
      x_107 = None
      x_108 = permute_38.reshape(4, 49, 384)
      permute_38 = None
      qkv_16 = torch._C._nn.linear(x_108,
          getattr_getattr_l__self___features___5_____4___attn_qkv_weight,
          getattr_getattr_l__self___features___5_____4___attn_qkv_bias)
      x_108 = None
      getattr_getattr_l__self___features___5_____4___attn_qkv_weight = None
      getattr_getattr_l__self___features___5_____4___attn_qkv_bias = None
      reshape_37 = qkv_16.reshape(4, 49, 3, 12, 32)
      qkv_16 = None
      qkv_17 = reshape_37.permute(2, 0, 3, 1, 4)
      reshape_37 = None
      q_16 = qkv_17[0]
      k_8 = qkv_17[1]
      v_8 = qkv_17[2]
      qkv_17 = None
      q_17 = q_16 * 0.1767766952966369
      q_16 = None
      transpose_16 = k_8.transpose(-2, -1)
      k_8 = None
      attn_44 = q_17.matmul(transpose_16)
      q_17 = None
      transpose_16 = None
      attn_45 = attn_44 + relative_position_bias_35
      attn_44 = None
      relative_position_bias_35 = None
      attn_46 = torch.nn.functional.softmax(attn_45, dim=-1)
      attn_45 = None
      attn_47 = torch.nn.functional.dropout(attn_46, p=0.0, training=True)
      attn_46 = None
      matmul_17 = attn_47.matmul(v_8)
      attn_47 = None
      v_8 = None
      transpose_17 = matmul_17.transpose(1, 2)
      matmul_17 = None
      x_109 = transpose_17.reshape(4, 49, 384)
      transpose_17 = None
      x_110 = torch._C._nn.linear(x_109,
          getattr_getattr_l__self___features___5_____4___attn_proj_weight,
          getattr_getattr_l__self___features___5_____4___attn_proj_bias)
      x_109 = None
      getattr_getattr_l__self___features___5_____4___attn_proj_weight = None
      getattr_getattr_l__self___features___5_____4___attn_proj_bias = None
      x_111 = torch.nn.functional.dropout(x_110, p=0.0, training=True)
      x_110 = None
      x_112 = x_111.view(1, 2, 2, 7, 7, 384)
      x_111 = None
      permute_40 = x_112.permute(0, 1, 3, 2, 4, 5)
      x_112 = None
      x_113 = permute_40.reshape(1, 14, 14, 384)
      permute_40 = None
      getitem_52 = x_113[slice(None, None, None), slice(None, 14, None), slice(
          None, 14, None), slice(None, None, None)]
      x_113 = None
      x_114 = getitem_52.contiguous()
      getitem_52 = None
      _log_api_usage_once_16 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2274 = []
      __temp_2274.extend((1, 1, 1, 1))
      noise_28 = torch.empty(__temp_2274, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_29 = noise_28.bernoulli_(0.8545454545454545)
      noise_28 = None
      div__14 = noise_29.div_(0.8545454545454545)
      mul_23 = x_114 * noise_29
      x_114 = None
      noise_29 = None
      x_115 = x_105 + mul_23
      x_105 = None
      mul_23 = None
      getattr_getattr_l__self___features___5_____4___norm2 = (self.
          getattr_getattr_L__self___features___5_____4___norm2(x_115))
      getattr_getattr_l__self___features___5_____4___mlp_0 = (self.
          getattr_getattr_L__self___features___5_____4___mlp_0(
          getattr_getattr_l__self___features___5_____4___norm2))
      getattr_getattr_l__self___features___5_____4___norm2 = None
      getattr_getattr_l__self___features___5_____4___mlp_1 = (self.
          getattr_getattr_L__self___features___5_____4___mlp_1(
          getattr_getattr_l__self___features___5_____4___mlp_0))
      getattr_getattr_l__self___features___5_____4___mlp_0 = None
      getattr_getattr_l__self___features___5_____4___mlp_2 = (self.
          getattr_getattr_L__self___features___5_____4___mlp_2(
          getattr_getattr_l__self___features___5_____4___mlp_1))
      getattr_getattr_l__self___features___5_____4___mlp_1 = None
      getattr_getattr_l__self___features___5_____4___mlp_3 = (self.
          getattr_getattr_L__self___features___5_____4___mlp_3(
          getattr_getattr_l__self___features___5_____4___mlp_2))
      getattr_getattr_l__self___features___5_____4___mlp_2 = None
      getattr_getattr_l__self___features___5_____4___mlp_4 = (self.
          getattr_getattr_L__self___features___5_____4___mlp_4(
          getattr_getattr_l__self___features___5_____4___mlp_3))
      getattr_getattr_l__self___features___5_____4___mlp_3 = None
      _log_api_usage_once_17 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2287 = []
      __temp_2287.extend((1, 1, 1, 1))
      noise_30 = torch.empty(__temp_2287, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_31 = noise_30.bernoulli_(0.8545454545454545)
      noise_30 = None
      div__15 = noise_31.div_(0.8545454545454545)
      mul_24 = getattr_getattr_l__self___features___5_____4___mlp_4 * noise_31
      getattr_getattr_l__self___features___5_____4___mlp_4 = None
      noise_31 = None
      x_116 = x_115 + mul_24
      x_115 = None
      mul_24 = None
      getattr_getattr_l__self___features___5_____5___norm1 = (self.
          getattr_getattr_L__self___features___5_____5___norm1(x_116))
      (
          getattr_getattr_l__self___features___5_____5___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___5_____5___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___5_____5___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___5_____5___attn_relative_position_index
          )
      relative_position_bias_36 = (
          getattr_getattr_l__self___features___5_____5___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___5_____5___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___5_____5___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___5_____5___attn_relative_position_index
          ) = None
      relative_position_bias_37 = relative_position_bias_36.view(49, 49, -1)
      relative_position_bias_36 = None
      permute_41 = relative_position_bias_37.permute(2, 0, 1)
      relative_position_bias_37 = None
      contiguous_18 = permute_41.contiguous()
      permute_41 = None
      relative_position_bias_39 = contiguous_18.unsqueeze(0)
      contiguous_18 = None
      getattr_getattr_l__self___features___5_____5___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___5_____5___attn_qkv_weight)
      getattr_getattr_l__self___features___5_____5___attn_proj_weight = (self.
          getattr_getattr_L__self___features___5_____5___attn_proj_weight)
      getattr_getattr_l__self___features___5_____5___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___5_____5___attn_qkv_bias)
      getattr_getattr_l__self___features___5_____5___attn_proj_bias = (self.
          getattr_getattr_L__self___features___5_____5___attn_proj_bias)
      x_117 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___5_____5___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___5_____5___norm1 = None
      x_118 = torch.roll(x_117, shifts=(-3, -3), dims=(1, 2))
      x_117 = None
      x_119 = x_118.view(1, 2, 7, 2, 7, 384)
      x_118 = None
      permute_42 = x_119.permute(0, 1, 3, 2, 4, 5)
      x_119 = None
      x_120 = permute_42.reshape(4, 49, 384)
      permute_42 = None
      qkv_18 = torch._C._nn.linear(x_120,
          getattr_getattr_l__self___features___5_____5___attn_qkv_weight,
          getattr_getattr_l__self___features___5_____5___attn_qkv_bias)
      getattr_getattr_l__self___features___5_____5___attn_qkv_weight = None
      getattr_getattr_l__self___features___5_____5___attn_qkv_bias = None
      reshape_41 = qkv_18.reshape(4, 49, 3, 12, 32)
      qkv_18 = None
      qkv_19 = reshape_41.permute(2, 0, 3, 1, 4)
      reshape_41 = None
      q_18 = qkv_19[0]
      k_9 = qkv_19[1]
      v_9 = qkv_19[2]
      qkv_19 = None
      q_19 = q_18 * 0.1767766952966369
      q_18 = None
      transpose_18 = k_9.transpose(-2, -1)
      k_9 = None
      attn_48 = q_19.matmul(transpose_18)
      q_19 = None
      transpose_18 = None
      attn_49 = attn_48 + relative_position_bias_39
      attn_48 = None
      relative_position_bias_39 = None
      attn_mask_20 = x_120.new_zeros((14, 14))
      x_120 = None
      attn_mask_20[slice(0, -7, None), slice(0, -7, None)] = 0
      setitem_36 = attn_mask_20
      attn_mask_20[slice(0, -7, None), slice(-7, -3, None)] = 1
      setitem_37 = attn_mask_20
      attn_mask_20[slice(0, -7, None), slice(-3, None, None)] = 2
      setitem_38 = attn_mask_20
      attn_mask_20[slice(-7, -3, None), slice(0, -7, None)] = 3
      setitem_39 = attn_mask_20
      attn_mask_20[slice(-7, -3, None), slice(-7, -3, None)] = 4
      setitem_40 = attn_mask_20
      attn_mask_20[slice(-7, -3, None), slice(-3, None, None)] = 5
      setitem_41 = attn_mask_20
      attn_mask_20[slice(-3, None, None), slice(0, -7, None)] = 6
      setitem_42 = attn_mask_20
      attn_mask_20[slice(-3, None, None), slice(-7, -3, None)] = 7
      setitem_43 = attn_mask_20
      attn_mask_20[slice(-3, None, None), slice(-3, None, None)] = 8
      setitem_44 = attn_mask_20
      attn_mask_21 = attn_mask_20.view(2, 7, 2, 7)
      attn_mask_20 = None
      permute_44 = attn_mask_21.permute(0, 2, 1, 3)
      attn_mask_21 = None
      attn_mask_22 = permute_44.reshape(4, 49)
      permute_44 = None
      unsqueeze_26 = attn_mask_22.unsqueeze(1)
      unsqueeze_27 = attn_mask_22.unsqueeze(2)
      attn_mask_22 = None
      attn_mask_23 = unsqueeze_26 - unsqueeze_27
      unsqueeze_26 = None
      unsqueeze_27 = None
      ne_4 = attn_mask_23 != 0
      masked_fill_8 = attn_mask_23.masked_fill(ne_4, -100.0)
      ne_4 = None
      eq_4 = attn_mask_23 == 0
      attn_mask_23 = None
      attn_mask_24 = masked_fill_8.masked_fill(eq_4, 0.0)
      masked_fill_8 = None
      eq_4 = None
      attn_50 = attn_49.view(1, 4, 12, 49, 49)
      attn_49 = None
      unsqueeze_28 = attn_mask_24.unsqueeze(1)
      attn_mask_24 = None
      unsqueeze_29 = unsqueeze_28.unsqueeze(0)
      unsqueeze_28 = None
      attn_51 = attn_50 + unsqueeze_29
      attn_50 = None
      unsqueeze_29 = None
      attn_52 = attn_51.view(-1, 12, 49, 49)
      attn_51 = None
      attn_53 = torch.nn.functional.softmax(attn_52, dim=-1)
      attn_52 = None
      attn_54 = torch.nn.functional.dropout(attn_53, p=0.0, training=True)
      attn_53 = None
      matmul_19 = attn_54.matmul(v_9)
      attn_54 = None
      v_9 = None
      transpose_19 = matmul_19.transpose(1, 2)
      matmul_19 = None
      x_121 = transpose_19.reshape(4, 49, 384)
      transpose_19 = None
      x_122 = torch._C._nn.linear(x_121,
          getattr_getattr_l__self___features___5_____5___attn_proj_weight,
          getattr_getattr_l__self___features___5_____5___attn_proj_bias)
      x_121 = None
      getattr_getattr_l__self___features___5_____5___attn_proj_weight = None
      getattr_getattr_l__self___features___5_____5___attn_proj_bias = None
      x_123 = torch.nn.functional.dropout(x_122, p=0.0, training=True)
      x_122 = None
      x_124 = x_123.view(1, 2, 2, 7, 7, 384)
      x_123 = None
      permute_45 = x_124.permute(0, 1, 3, 2, 4, 5)
      x_124 = None
      x_125 = permute_45.reshape(1, 14, 14, 384)
      permute_45 = None
      x_126 = torch.roll(x_125, shifts=(3, 3), dims=(1, 2))
      x_125 = None
      getitem_57 = x_126[slice(None, None, None), slice(None, 14, None), slice(
          None, 14, None), slice(None, None, None)]
      x_126 = None
      x_127 = getitem_57.contiguous()
      getitem_57 = None
      _log_api_usage_once_18 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2355 = []
      __temp_2355.extend((1, 1, 1, 1))
      noise_32 = torch.empty(__temp_2355, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_33 = noise_32.bernoulli_(0.8363636363636364)
      noise_32 = None
      div__16 = noise_33.div_(0.8363636363636364)
      mul_26 = x_127 * noise_33
      x_127 = None
      noise_33 = None
      x_128 = x_116 + mul_26
      x_116 = None
      mul_26 = None
      getattr_getattr_l__self___features___5_____5___norm2 = (self.
          getattr_getattr_L__self___features___5_____5___norm2(x_128))
      getattr_getattr_l__self___features___5_____5___mlp_0 = (self.
          getattr_getattr_L__self___features___5_____5___mlp_0(
          getattr_getattr_l__self___features___5_____5___norm2))
      getattr_getattr_l__self___features___5_____5___norm2 = None
      getattr_getattr_l__self___features___5_____5___mlp_1 = (self.
          getattr_getattr_L__self___features___5_____5___mlp_1(
          getattr_getattr_l__self___features___5_____5___mlp_0))
      getattr_getattr_l__self___features___5_____5___mlp_0 = None
      getattr_getattr_l__self___features___5_____5___mlp_2 = (self.
          getattr_getattr_L__self___features___5_____5___mlp_2(
          getattr_getattr_l__self___features___5_____5___mlp_1))
      getattr_getattr_l__self___features___5_____5___mlp_1 = None
      getattr_getattr_l__self___features___5_____5___mlp_3 = (self.
          getattr_getattr_L__self___features___5_____5___mlp_3(
          getattr_getattr_l__self___features___5_____5___mlp_2))
      getattr_getattr_l__self___features___5_____5___mlp_2 = None
      getattr_getattr_l__self___features___5_____5___mlp_4 = (self.
          getattr_getattr_L__self___features___5_____5___mlp_4(
          getattr_getattr_l__self___features___5_____5___mlp_3))
      getattr_getattr_l__self___features___5_____5___mlp_3 = None
      _log_api_usage_once_19 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2368 = []
      __temp_2368.extend((1, 1, 1, 1))
      noise_34 = torch.empty(__temp_2368, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_35 = noise_34.bernoulli_(0.8363636363636364)
      noise_34 = None
      div__17 = noise_35.div_(0.8363636363636364)
      mul_27 = getattr_getattr_l__self___features___5_____5___mlp_4 * noise_35
      getattr_getattr_l__self___features___5_____5___mlp_4 = None
      noise_35 = None
      x_129 = x_128 + mul_27
      x_128 = None
      mul_27 = None
      x_130 = torch.nn.functional.pad(x_129, (0, 0, 0, 0, 0, 0))
      x_129 = None
      x0_2 = x_130[Ellipsis, slice(0, None, 2), slice(0, None, 2), slice(None,
          None, None)]
      x1_2 = x_130[Ellipsis, slice(1, None, 2), slice(0, None, 2), slice(None,
          None, None)]
      x2_2 = x_130[Ellipsis, slice(0, None, 2), slice(1, None, 2), slice(None,
          None, None)]
      x3_2 = x_130[Ellipsis, slice(1, None, 2), slice(1, None, 2), slice(None,
          None, None)]
      x_130 = None
      x_132 = torch.cat([x0_2, x1_2, x2_2, x3_2], -1)
      x0_2 = None
      x1_2 = None
      x2_2 = None
      x3_2 = None
      x_133 = self.getattr_L__self___features___6___norm(x_132)
      x_132 = None
      x_134 = self.getattr_L__self___features___6___reduction(x_133)
      x_133 = None
      getattr_getattr_l__self___features___7_____0___norm1 = (self.
          getattr_getattr_L__self___features___7_____0___norm1(x_134))
      (
          getattr_getattr_l__self___features___7_____0___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___7_____0___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___7_____0___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___7_____0___attn_relative_position_index
          )
      relative_position_bias_40 = (
          getattr_getattr_l__self___features___7_____0___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___7_____0___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___7_____0___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___7_____0___attn_relative_position_index
          ) = None
      relative_position_bias_41 = relative_position_bias_40.view(49, 49, -1)
      relative_position_bias_40 = None
      permute_46 = relative_position_bias_41.permute(2, 0, 1)
      relative_position_bias_41 = None
      contiguous_20 = permute_46.contiguous()
      permute_46 = None
      relative_position_bias_43 = contiguous_20.unsqueeze(0)
      contiguous_20 = None
      getattr_getattr_l__self___features___7_____0___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___7_____0___attn_qkv_weight)
      getattr_getattr_l__self___features___7_____0___attn_proj_weight = (self.
          getattr_getattr_L__self___features___7_____0___attn_proj_weight)
      getattr_getattr_l__self___features___7_____0___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___7_____0___attn_qkv_bias)
      getattr_getattr_l__self___features___7_____0___attn_proj_bias = (self.
          getattr_getattr_L__self___features___7_____0___attn_proj_bias)
      x_135 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___7_____0___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___7_____0___norm1 = None
      x_136 = x_135.view(1, 1, 7, 1, 7, 768)
      x_135 = None
      permute_47 = x_136.permute(0, 1, 3, 2, 4, 5)
      x_136 = None
      x_137 = permute_47.reshape(1, 49, 768)
      permute_47 = None
      qkv_20 = torch._C._nn.linear(x_137,
          getattr_getattr_l__self___features___7_____0___attn_qkv_weight,
          getattr_getattr_l__self___features___7_____0___attn_qkv_bias)
      x_137 = None
      getattr_getattr_l__self___features___7_____0___attn_qkv_weight = None
      getattr_getattr_l__self___features___7_____0___attn_qkv_bias = None
      reshape_46 = qkv_20.reshape(1, 49, 3, 24, 32)
      qkv_20 = None
      qkv_21 = reshape_46.permute(2, 0, 3, 1, 4)
      reshape_46 = None
      q_20 = qkv_21[0]
      k_10 = qkv_21[1]
      v_10 = qkv_21[2]
      qkv_21 = None
      q_21 = q_20 * 0.1767766952966369
      q_20 = None
      transpose_20 = k_10.transpose(-2, -1)
      k_10 = None
      attn_55 = q_21.matmul(transpose_20)
      q_21 = None
      transpose_20 = None
      attn_56 = attn_55 + relative_position_bias_43
      attn_55 = None
      relative_position_bias_43 = None
      attn_57 = torch.nn.functional.softmax(attn_56, dim=-1)
      attn_56 = None
      attn_58 = torch.nn.functional.dropout(attn_57, p=0.0, training=True)
      attn_57 = None
      matmul_21 = attn_58.matmul(v_10)
      attn_58 = None
      v_10 = None
      transpose_21 = matmul_21.transpose(1, 2)
      matmul_21 = None
      x_138 = transpose_21.reshape(1, 49, 768)
      transpose_21 = None
      x_139 = torch._C._nn.linear(x_138,
          getattr_getattr_l__self___features___7_____0___attn_proj_weight,
          getattr_getattr_l__self___features___7_____0___attn_proj_bias)
      x_138 = None
      getattr_getattr_l__self___features___7_____0___attn_proj_weight = None
      getattr_getattr_l__self___features___7_____0___attn_proj_bias = None
      x_140 = torch.nn.functional.dropout(x_139, p=0.0, training=True)
      x_139 = None
      x_141 = x_140.view(1, 1, 1, 7, 7, 768)
      x_140 = None
      permute_49 = x_141.permute(0, 1, 3, 2, 4, 5)
      x_141 = None
      x_142 = permute_49.reshape(1, 7, 7, 768)
      permute_49 = None
      getitem_66 = x_142[slice(None, None, None), slice(None, 7, None), slice(
          None, 7, None), slice(None, None, None)]
      x_142 = None
      x_143 = getitem_66.contiguous()
      getitem_66 = None
      _log_api_usage_once_20 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2421 = []
      __temp_2421.extend((1, 1, 1, 1))
      noise_36 = torch.empty(__temp_2421, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_37 = noise_36.bernoulli_(0.8181818181818181)
      noise_36 = None
      div__18 = noise_37.div_(0.8181818181818181)
      mul_29 = x_143 * noise_37
      x_143 = None
      noise_37 = None
      x_144 = x_134 + mul_29
      x_134 = None
      mul_29 = None
      getattr_getattr_l__self___features___7_____0___norm2 = (self.
          getattr_getattr_L__self___features___7_____0___norm2(x_144))
      getattr_getattr_l__self___features___7_____0___mlp_0 = (self.
          getattr_getattr_L__self___features___7_____0___mlp_0(
          getattr_getattr_l__self___features___7_____0___norm2))
      getattr_getattr_l__self___features___7_____0___norm2 = None
      getattr_getattr_l__self___features___7_____0___mlp_1 = (self.
          getattr_getattr_L__self___features___7_____0___mlp_1(
          getattr_getattr_l__self___features___7_____0___mlp_0))
      getattr_getattr_l__self___features___7_____0___mlp_0 = None
      getattr_getattr_l__self___features___7_____0___mlp_2 = (self.
          getattr_getattr_L__self___features___7_____0___mlp_2(
          getattr_getattr_l__self___features___7_____0___mlp_1))
      getattr_getattr_l__self___features___7_____0___mlp_1 = None
      getattr_getattr_l__self___features___7_____0___mlp_3 = (self.
          getattr_getattr_L__self___features___7_____0___mlp_3(
          getattr_getattr_l__self___features___7_____0___mlp_2))
      getattr_getattr_l__self___features___7_____0___mlp_2 = None
      getattr_getattr_l__self___features___7_____0___mlp_4 = (self.
          getattr_getattr_L__self___features___7_____0___mlp_4(
          getattr_getattr_l__self___features___7_____0___mlp_3))
      getattr_getattr_l__self___features___7_____0___mlp_3 = None
      _log_api_usage_once_21 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2434 = []
      __temp_2434.extend((1, 1, 1, 1))
      noise_38 = torch.empty(__temp_2434, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_39 = noise_38.bernoulli_(0.8181818181818181)
      noise_38 = None
      div__19 = noise_39.div_(0.8181818181818181)
      mul_30 = getattr_getattr_l__self___features___7_____0___mlp_4 * noise_39
      getattr_getattr_l__self___features___7_____0___mlp_4 = None
      noise_39 = None
      x_145 = x_144 + mul_30
      x_144 = None
      mul_30 = None
      getattr_getattr_l__self___features___7_____1___norm1 = (self.
          getattr_getattr_L__self___features___7_____1___norm1(x_145))
      (
          getattr_getattr_l__self___features___7_____1___attn_relative_position_bias_table
          ) = (self.
          getattr_getattr_L__self___features___7_____1___attn_relative_position_bias_table
          )
      (getattr_getattr_l__self___features___7_____1___attn_relative_position_index
          ) = (self.
          getattr_getattr_L__self___features___7_____1___attn_relative_position_index
          )
      relative_position_bias_44 = (
          getattr_getattr_l__self___features___7_____1___attn_relative_position_bias_table
          [
          getattr_getattr_l__self___features___7_____1___attn_relative_position_index
          ])
      (
          getattr_getattr_l__self___features___7_____1___attn_relative_position_bias_table
          ) = None
      (getattr_getattr_l__self___features___7_____1___attn_relative_position_index
          ) = None
      relative_position_bias_45 = relative_position_bias_44.view(49, 49, -1)
      relative_position_bias_44 = None
      permute_50 = relative_position_bias_45.permute(2, 0, 1)
      relative_position_bias_45 = None
      contiguous_22 = permute_50.contiguous()
      permute_50 = None
      relative_position_bias_47 = contiguous_22.unsqueeze(0)
      contiguous_22 = None
      getattr_getattr_l__self___features___7_____1___attn_qkv_weight = (self.
          getattr_getattr_L__self___features___7_____1___attn_qkv_weight)
      getattr_getattr_l__self___features___7_____1___attn_proj_weight = (self.
          getattr_getattr_L__self___features___7_____1___attn_proj_weight)
      getattr_getattr_l__self___features___7_____1___attn_qkv_bias = (self.
          getattr_getattr_L__self___features___7_____1___attn_qkv_bias)
      getattr_getattr_l__self___features___7_____1___attn_proj_bias = (self.
          getattr_getattr_L__self___features___7_____1___attn_proj_bias)
      x_146 = torch.nn.functional.pad(
          getattr_getattr_l__self___features___7_____1___norm1, (0, 0, 0, 0, 0, 0))
      getattr_getattr_l__self___features___7_____1___norm1 = None
      x_147 = x_146.view(1, 1, 7, 1, 7, 768)
      x_146 = None
      permute_51 = x_147.permute(0, 1, 3, 2, 4, 5)
      x_147 = None
      x_148 = permute_51.reshape(1, 49, 768)
      permute_51 = None
      qkv_22 = torch._C._nn.linear(x_148,
          getattr_getattr_l__self___features___7_____1___attn_qkv_weight,
          getattr_getattr_l__self___features___7_____1___attn_qkv_bias)
      x_148 = None
      getattr_getattr_l__self___features___7_____1___attn_qkv_weight = None
      getattr_getattr_l__self___features___7_____1___attn_qkv_bias = None
      reshape_50 = qkv_22.reshape(1, 49, 3, 24, 32)
      qkv_22 = None
      qkv_23 = reshape_50.permute(2, 0, 3, 1, 4)
      reshape_50 = None
      q_22 = qkv_23[0]
      k_11 = qkv_23[1]
      v_11 = qkv_23[2]
      qkv_23 = None
      q_23 = q_22 * 0.1767766952966369
      q_22 = None
      transpose_22 = k_11.transpose(-2, -1)
      k_11 = None
      attn_59 = q_23.matmul(transpose_22)
      q_23 = None
      transpose_22 = None
      attn_60 = attn_59 + relative_position_bias_47
      attn_59 = None
      relative_position_bias_47 = None
      attn_61 = torch.nn.functional.softmax(attn_60, dim=-1)
      attn_60 = None
      attn_62 = torch.nn.functional.dropout(attn_61, p=0.0, training=True)
      attn_61 = None
      matmul_23 = attn_62.matmul(v_11)
      attn_62 = None
      v_11 = None
      transpose_23 = matmul_23.transpose(1, 2)
      matmul_23 = None
      x_149 = transpose_23.reshape(1, 49, 768)
      transpose_23 = None
      x_150 = torch._C._nn.linear(x_149,
          getattr_getattr_l__self___features___7_____1___attn_proj_weight,
          getattr_getattr_l__self___features___7_____1___attn_proj_bias)
      x_149 = None
      getattr_getattr_l__self___features___7_____1___attn_proj_weight = None
      getattr_getattr_l__self___features___7_____1___attn_proj_bias = None
      x_151 = torch.nn.functional.dropout(x_150, p=0.0, training=True)
      x_150 = None
      x_152 = x_151.view(1, 1, 1, 7, 7, 768)
      x_151 = None
      permute_53 = x_152.permute(0, 1, 3, 2, 4, 5)
      x_152 = None
      x_153 = permute_53.reshape(1, 7, 7, 768)
      permute_53 = None
      getitem_71 = x_153[slice(None, None, None), slice(None, 7, None), slice(
          None, 7, None), slice(None, None, None)]
      x_153 = None
      x_154 = getitem_71.contiguous()
      getitem_71 = None
      _log_api_usage_once_22 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2470 = []
      __temp_2470.extend((1, 1, 1, 1))
      noise_40 = torch.empty(__temp_2470, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_41 = noise_40.bernoulli_(0.8)
      noise_40 = None
      div__20 = noise_41.div_(0.8)
      mul_32 = x_154 * noise_41
      x_154 = None
      noise_41 = None
      x_155 = x_145 + mul_32
      x_145 = None
      mul_32 = None
      getattr_getattr_l__self___features___7_____1___norm2 = (self.
          getattr_getattr_L__self___features___7_____1___norm2(x_155))
      getattr_getattr_l__self___features___7_____1___mlp_0 = (self.
          getattr_getattr_L__self___features___7_____1___mlp_0(
          getattr_getattr_l__self___features___7_____1___norm2))
      getattr_getattr_l__self___features___7_____1___norm2 = None
      getattr_getattr_l__self___features___7_____1___mlp_1 = (self.
          getattr_getattr_L__self___features___7_____1___mlp_1(
          getattr_getattr_l__self___features___7_____1___mlp_0))
      getattr_getattr_l__self___features___7_____1___mlp_0 = None
      getattr_getattr_l__self___features___7_____1___mlp_2 = (self.
          getattr_getattr_L__self___features___7_____1___mlp_2(
          getattr_getattr_l__self___features___7_____1___mlp_1))
      getattr_getattr_l__self___features___7_____1___mlp_1 = None
      getattr_getattr_l__self___features___7_____1___mlp_3 = (self.
          getattr_getattr_L__self___features___7_____1___mlp_3(
          getattr_getattr_l__self___features___7_____1___mlp_2))
      getattr_getattr_l__self___features___7_____1___mlp_2 = None
      getattr_getattr_l__self___features___7_____1___mlp_4 = (self.
          getattr_getattr_L__self___features___7_____1___mlp_4(
          getattr_getattr_l__self___features___7_____1___mlp_3))
      getattr_getattr_l__self___features___7_____1___mlp_3 = None
      _log_api_usage_once_23 = torch._C._log_api_usage_once(
          'torchvision.ops.stochastic_depth.stochastic_depth')
      __temp_2483 = []
      __temp_2483.extend((1, 1, 1, 1))
      noise_42 = torch.empty(__temp_2483, dtype=torch.float32, device=device(type
          ='cpu'))
      noise_43 = noise_42.bernoulli_(0.8)
      noise_42 = None
      div__21 = noise_43.div_(0.8)
      mul_33 = getattr_getattr_l__self___features___7_____1___mlp_4 * noise_43
      getattr_getattr_l__self___features___7_____1___mlp_4 = None
      noise_43 = None
      x_157 = x_155 + mul_33
      x_155 = None
      mul_33 = None
      x_158 = self.L__self___norm(x_157)
      x_157 = None
      __temp_2490 = []
      __temp_2490.extend((0, 3, 1, 2))
      x_159 = torch.permute(x_158, __temp_2490)
      x_158 = None
      x_160 = self.L__self___avgpool(x_159)
      x_159 = None
      x_161 = self.L__self___flatten(x_160)
      x_160 = None
      x_162 = self.L__self___head(x_161)
      x_161 = None
      return x_162,

  ```
</details>
